or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

breadcrumb-system.mdcontext-management.mdcore-client.mddata-processing.mdframework-integrations.mdindex.mdlogging-integration.mdtransport-layer.md

data-processing.mddocs/

0

# Data Processing

1

2

Raven provides a comprehensive data processing pipeline for sanitizing sensitive information, transforming data structures, and controlling what information is sent to Sentry servers.

3

4

## Capabilities

5

6

### Base Processor

7

8

Foundation class for all data processors.

9

10

```python { .api }

11

from raven.processors import Processor

12

13

class Processor:

14

def __init__(self, client):

15

"""

16

Base data processor.

17

18

Parameters:

19

- client (Client): Sentry client instance

20

"""

21

22

def process(self, data, **kwargs):

23

"""

24

Process event data.

25

26

Parameters:

27

- data (dict): Event data to process

28

- **kwargs: Additional processing options

29

30

Returns:

31

dict: Processed event data

32

"""

33

```

34

35

### Password Sanitization

36

37

Removes sensitive data like passwords and authentication tokens from event data.

38

39

```python { .api }

40

from raven.processors import SanitizePasswordsProcessor

41

42

class SanitizePasswordsProcessor(SanitizeKeysProcessor):

43

KEYS = frozenset([

44

'password', 'secret', 'passwd', 'authorization', 'api_key',

45

'apikey', 'sentry_dsn', 'access_token'

46

])

47

48

VALUES_RE = re.compile(r'^(?:\d[ -]*?){13,16}$')

49

MASK = '*' * 8

50

51

def sanitize(self, item, value):

52

"""

53

Sanitize field values, masking passwords and credit card numbers.

54

55

Parameters:

56

- item (str): Field name/key

57

- value: Field value

58

59

Returns:

60

Sanitized value with sensitive data masked

61

"""

62

63

def process(self, data, **kwargs):

64

"""

65

Remove sensitive fields from event data.

66

67

Parameters:

68

- data (dict): Event data

69

70

Returns:

71

dict: Sanitized event data

72

"""

73

```

74

75

### Key-Based Sanitization

76

77

Removes data matching configurable key patterns using regular expressions.

78

79

```python { .api }

80

from raven.processors import SanitizeKeysProcessor

81

82

class SanitizeKeysProcessor(Processor):

83

def __init__(self, client, sanitize_keys=None):

84

"""

85

Key pattern-based sanitizer.

86

87

Parameters:

88

- client (Client): Sentry client instance

89

- sanitize_keys (list): List of regex patterns for keys to sanitize

90

"""

91

92

KEYS = frozenset([

93

'password', 'secret', 'passwd', 'token', 'api_key',

94

'access_token', 'auth_token', 'credentials'

95

])

96

97

def process(self, data, **kwargs):

98

"""

99

Sanitize data based on key patterns.

100

101

Parameters:

102

- data (dict): Event data

103

104

Returns:

105

dict: Sanitized event data

106

"""

107

```

108

109

### POST Data Removal

110

111

Removes HTTP POST data from request information.

112

113

```python { .api }

114

from raven.processors import RemovePostDataProcessor

115

116

class RemovePostDataProcessor(Processor):

117

def process(self, data, **kwargs):

118

"""

119

Remove HTTP POST data from event.

120

121

Parameters:

122

- data (dict): Event data

123

124

Returns:

125

dict: Event data with POST data removed

126

"""

127

```

128

129

### Stack Locals Removal

130

131

Removes local variables from stack trace frames to reduce data size and prevent sensitive information leakage.

132

133

```python { .api }

134

from raven.processors import RemoveStackLocalsProcessor

135

136

class RemoveStackLocalsProcessor(Processor):

137

def process(self, data, **kwargs):

138

"""

139

Remove local variables from stack traces.

140

141

Parameters:

142

- data (dict): Event data

143

144

Returns:

145

dict: Event data with stack locals removed

146

"""

147

```

148

149

### Data Transformation Utilities

150

151

Core utilities for data processing and serialization.

152

153

```python { .api }

154

from raven.utils.serializer import transform, register

155

156

def transform(data, **kwargs):

157

"""

158

Transform data for serialization.

159

160

Parameters:

161

- data: Data to transform

162

- **kwargs: Transformation options

163

164

Returns:

165

Serializable data structure

166

"""

167

168

def register(type_class, serializer):

169

"""

170

Register custom serializer for data type.

171

172

Parameters:

173

- type_class (type): Data type to serialize

174

- serializer (callable): Serialization function

175

"""

176

```

177

178

## Usage Examples

179

180

### Basic Processor Configuration

181

182

```python

183

from raven import Client

184

from raven.processors import SanitizePasswordsProcessor, RemovePostDataProcessor

185

186

client = Client(

187

dsn='https://your-dsn@sentry.io/project-id',

188

processors=[

189

SanitizePasswordsProcessor,

190

RemovePostDataProcessor,

191

]

192

)

193

194

# These fields will be sanitized automatically

195

user_data = {

196

'username': 'john_doe',

197

'password': 'secret123', # Will be masked

198

'email': 'john@example.com'

199

}

200

201

client.extra_context({'user_data': user_data})

202

client.captureMessage('User login attempt')

203

```

204

205

### Custom Sanitization Keys

206

207

```python

208

from raven import Client

209

from raven.processors import SanitizeKeysProcessor

210

211

class CustomSanitizeProcessor(SanitizeKeysProcessor):

212

KEYS = frozenset([

213

'password', 'secret', 'token', 'api_key',

214

'credit_card', 'ssn', 'social_security',

215

'bank_account', 'routing_number'

216

])

217

218

client = Client(

219

dsn='https://your-dsn@sentry.io/project-id',

220

processors=[CustomSanitizeProcessor]

221

)

222

```

223

224

### Pattern-Based Sanitization

225

226

```python

227

from raven import Client

228

from raven.processors import SanitizeKeysProcessor

229

import re

230

231

class RegexSanitizeProcessor(SanitizeKeysProcessor):

232

def __init__(self, client):

233

super().__init__(client)

234

self.sanitize_patterns = [

235

re.compile(r'.*password.*', re.IGNORECASE),

236

re.compile(r'.*secret.*', re.IGNORECASE),

237

re.compile(r'.*token.*', re.IGNORECASE),

238

re.compile(r'.*key.*', re.IGNORECASE),

239

re.compile(r'.*auth.*', re.IGNORECASE),

240

]

241

242

def sanitize(self, key, value):

243

if any(pattern.match(key) for pattern in self.sanitize_patterns):

244

return self.MASK

245

return value

246

247

client = Client(

248

dsn='https://your-dsn@sentry.io/project-id',

249

processors=[RegexSanitizeProcessor]

250

)

251

```

252

253

### Custom Data Processor

254

255

```python

256

from raven.processors import Processor

257

258

class EmailSanitizeProcessor(Processor):

259

def process(self, data, **kwargs):

260

def sanitize_emails(obj):

261

if isinstance(obj, dict):

262

return {

263

key: sanitize_emails(value)

264

for key, value in obj.items()

265

}

266

elif isinstance(obj, list):

267

return [sanitize_emails(item) for item in obj]

268

elif isinstance(obj, str) and '@' in obj:

269

# Simple email detection and masking

270

if obj.count('@') == 1 and '.' in obj.split('@')[1]:

271

user, domain = obj.split('@')

272

return f"{user[0]}***@{domain}"

273

return obj

274

275

return sanitize_emails(data)

276

277

class PIISanitizeProcessor(Processor):

278

def process(self, data, **kwargs):

279

import re

280

281

def sanitize_pii(obj):

282

if isinstance(obj, dict):

283

return {

284

key: sanitize_pii(value)

285

for key, value in obj.items()

286

}

287

elif isinstance(obj, list):

288

return [sanitize_pii(item) for item in obj]

289

elif isinstance(obj, str):

290

# Sanitize SSN pattern (XXX-XX-XXXX)

291

obj = re.sub(r'\d{3}-\d{2}-\d{4}', 'XXX-XX-XXXX', obj)

292

# Sanitize credit card pattern

293

obj = re.sub(r'\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}',

294

'XXXX-XXXX-XXXX-XXXX', obj)

295

# Sanitize phone numbers

296

obj = re.sub(r'\(\d{3}\)\s?\d{3}-\d{4}', '(XXX) XXX-XXXX', obj)

297

return obj

298

299

return sanitize_pii(data)

300

301

client = Client(

302

dsn='https://your-dsn@sentry.io/project-id',

303

processors=[

304

EmailSanitizeProcessor,

305

PIISanitizeProcessor,

306

'raven.processors.SanitizePasswordsProcessor'

307

]

308

)

309

```

310

311

### Environment-Specific Processing

312

313

```python

314

import os

315

from raven import Client

316

from raven.processors import (

317

SanitizePasswordsProcessor,

318

RemovePostDataProcessor,

319

RemoveStackLocalsProcessor

320

)

321

322

def get_processors():

323

processors = [SanitizePasswordsProcessor]

324

325

if os.getenv('ENVIRONMENT') == 'production':

326

# More aggressive sanitization in production

327

processors.extend([

328

RemovePostDataProcessor,

329

RemoveStackLocalsProcessor

330

])

331

332

return processors

333

334

client = Client(

335

dsn='https://your-dsn@sentry.io/project-id',

336

processors=get_processors()

337

)

338

```

339

340

### Custom Serializer Registration

341

342

```python

343

from raven.utils.serializer import register, transform

344

from decimal import Decimal

345

import datetime

346

347

# Custom serializers for non-JSON types

348

def serialize_decimal(obj):

349

return float(obj)

350

351

def serialize_datetime(obj):

352

return obj.isoformat()

353

354

def serialize_custom_class(obj):

355

return {

356

'type': obj.__class__.__name__,

357

'value': str(obj),

358

'attributes': {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}

359

}

360

361

# Register custom serializers

362

register(Decimal, serialize_decimal)

363

register(datetime.datetime, serialize_datetime)

364

register(MyCustomClass, serialize_custom_class)

365

366

# Now these types will be properly serialized

367

data = {

368

'price': Decimal('19.99'),

369

'timestamp': datetime.datetime.now(),

370

'custom_obj': MyCustomClass()

371

}

372

373

client.extra_context({'data': data})

374

client.captureMessage('Custom data types')

375

```

376

377

### Processor Performance Optimization

378

379

```python

380

from raven.processors import Processor

381

import time

382

383

class PerformanceTrackingProcessor(Processor):

384

def __init__(self, client):

385

super().__init__(client)

386

self.processing_times = []

387

388

def process(self, data, **kwargs):

389

start_time = time.time()

390

391

# Process data here

392

processed_data = self._process_internal(data)

393

394

processing_time = time.time() - start_time

395

self.processing_times.append(processing_time)

396

397

# Log slow processing

398

if processing_time > 0.1: # 100ms threshold

399

print(f"Slow data processing: {processing_time:.3f}s")

400

401

return processed_data

402

403

def _process_internal(self, data):

404

# Your actual processing logic

405

return data

406

407

class ConditionalProcessor(Processor):

408

def process(self, data, **kwargs):

409

# Skip processing for certain event types

410

if data.get('logger') == 'performance':

411

return data

412

413

# Skip for low-priority events

414

if data.get('level') == 'debug':

415

return data

416

417

return self._sanitize_data(data)

418

```