or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-configuration.mddata-filtering.mderror-handling.mdindex.mdrecord-modes.mdrequest-matching.mdrequest-response.mdserialization.mdtest-integration.md

data-filtering.mddocs/

0

# Data Filtering

1

2

Functions for removing or replacing sensitive data in requests and responses before recording to cassettes. VCR.py provides comprehensive filtering capabilities to sanitize sensitive information while maintaining test functionality.

3

4

## Capabilities

5

6

### Header Filtering

7

8

Functions for modifying request and response headers before recording.

9

10

```python { .api }

11

def replace_headers(request: Request, replacements: list) -> Request:

12

"""

13

Replace headers in request according to replacements list.

14

15

Args:

16

request: Request object to modify

17

replacements: List of (key, value) tuples where value can be:

18

- str: Simple replacement value

19

- None: Remove the header entirely

20

- callable: Function(key, value, request) -> str or None

21

22

Returns:

23

Request: Modified request object

24

"""

25

26

def remove_headers(request: Request, headers_to_remove: list) -> Request:

27

"""

28

Remove specified headers from request.

29

30

Args:

31

request: Request object to modify

32

headers_to_remove: List of header names to remove

33

34

Returns:

35

Request: Request with specified headers removed

36

"""

37

```

38

39

### Query Parameter Filtering

40

41

Functions for sanitizing URL query parameters.

42

43

```python { .api }

44

def replace_query_parameters(request: Request, replacements: list) -> Request:

45

"""

46

Replace or remove query parameters from request URI.

47

48

Args:

49

request: Request object to modify

50

replacements: List of (param_name, value) tuples where value can be:

51

- str: Replacement value

52

- None: Remove parameter entirely

53

- callable: Function(key, value, request) -> str or None

54

55

Returns:

56

Request: Request with modified query parameters

57

"""

58

```

59

60

### POST Data Filtering

61

62

Functions for sanitizing form data and request body parameters.

63

64

```python { .api }

65

def replace_post_data_parameters(request: Request, replacements: list) -> Request:

66

"""

67

Replace or remove POST data parameters from request body.

68

69

Args:

70

request: Request object to modify

71

replacements: List of (param_name, value) tuples where value can be:

72

- str: Replacement value

73

- None: Remove parameter entirely

74

- callable: Function(key, value, request) -> str or None

75

76

Returns:

77

Request: Request with modified POST data

78

"""

79

```

80

81

### Response Filtering

82

83

Functions for processing response content before recording.

84

85

```python { .api }

86

def decode_response(response) -> Response:

87

"""

88

Decode compressed response content (gzip, deflate).

89

90

Args:

91

response: Response object to decode

92

93

Returns:

94

Response: Response with decoded content

95

"""

96

```

97

98

## Usage Examples

99

100

### Basic Header Filtering

101

102

```python

103

import vcr

104

105

# Filter sensitive headers

106

my_vcr = vcr.VCR(

107

filter_headers=['authorization', 'x-api-key', 'cookie']

108

)

109

110

@my_vcr.use_cassette('filtered.yaml')

111

def test_with_filtered_headers():

112

# Authorization headers will be removed from recorded cassette

113

response = requests.get(

114

'https://api.example.com/data',

115

headers={'Authorization': 'Bearer secret-token'}

116

)

117

```

118

119

### Header Replacement with Custom Values

120

121

```python

122

# Replace headers with static values

123

my_vcr = vcr.VCR(

124

filter_headers=[

125

('authorization', 'Bearer REDACTED'),

126

('x-api-key', 'FILTERED'),

127

('user-agent', None) # Remove entirely

128

]

129

)

130

```

131

132

### Dynamic Header Filtering

133

134

```python

135

def sanitize_auth_header(key, value, request):

136

"""Custom function to sanitize authorization headers."""

137

if value.startswith('Bearer '):

138

return 'Bearer [FILTERED-TOKEN]'

139

elif value.startswith('Basic '):

140

return 'Basic [FILTERED-CREDENTIALS]'

141

else:

142

return '[FILTERED-AUTH]'

143

144

my_vcr = vcr.VCR(

145

filter_headers=[

146

('authorization', sanitize_auth_header),

147

('x-session-id', lambda k, v, r: 'session-redacted')

148

]

149

)

150

```

151

152

### Query Parameter Filtering

153

154

```python

155

# Remove sensitive query parameters

156

my_vcr = vcr.VCR(

157

filter_query_parameters=['api_key', 'access_token', 'session_id']

158

)

159

160

@my_vcr.use_cassette('no_secrets.yaml')

161

def test_filtered_query_params():

162

# These parameters will be removed from recorded URLs

163

response = requests.get(

164

'https://api.example.com/data?api_key=secret123&user_id=456'

165

)

166

# Recorded URL: https://api.example.com/data?user_id=456

167

```

168

169

### Query Parameter Replacement

170

171

```python

172

my_vcr = vcr.VCR(

173

filter_query_parameters=[

174

('api_key', 'REDACTED'),

175

('timestamp', lambda k, v, r: '1234567890'), # Fixed timestamp

176

('nonce', None) # Remove entirely

177

]

178

)

179

```

180

181

### POST Data Filtering

182

183

```python

184

# Filter form data parameters

185

my_vcr = vcr.VCR(

186

filter_post_data_parameters=['password', 'credit_card', 'ssn']

187

)

188

189

@my_vcr.use_cassette('safe_posts.yaml')

190

def test_filtered_post_data():

191

# Sensitive form data will be removed from recordings

192

response = requests.post(

193

'https://api.example.com/submit',

194

data={'username': 'john', 'password': 'secret123', 'email': 'john@example.com'}

195

)

196

# Recorded data: {'username': 'john', 'email': 'john@example.com'}

197

```

198

199

### Custom POST Data Processing

200

201

```python

202

def mask_credit_card(key, value, request):

203

"""Mask credit card numbers but preserve format."""

204

if len(value) == 16 and value.isdigit():

205

return f"****-****-****-{value[-4:]}"

206

return value

207

208

my_vcr = vcr.VCR(

209

filter_post_data_parameters=[

210

('password', '[FILTERED]'),

211

('card_number', mask_credit_card)

212

]

213

)

214

```

215

216

### Response Content Filtering

217

218

```python

219

def filter_response_data(response):

220

"""Custom response filtering function."""

221

import json

222

223

try:

224

# Parse JSON response

225

data = json.loads(response['body']['string'])

226

227

# Remove sensitive fields

228

if 'user' in data:

229

data['user'].pop('email', None)

230

data['user'].pop('phone', None)

231

232

# Mask API keys in response

233

if 'api_keys' in data:

234

data['api_keys'] = ['[REDACTED]'] * len(data['api_keys'])

235

236

# Update response body

237

response['body']['string'] = json.dumps(data)

238

except (json.JSONDecodeError, KeyError, TypeError):

239

# Non-JSON response or missing fields - leave unchanged

240

pass

241

242

return response

243

244

my_vcr = vcr.VCR(

245

before_record_response=filter_response_data,

246

decode_compressed_response=True # Decode gzipped responses first

247

)

248

```

249

250

### Comprehensive Request Filtering

251

252

```python

253

def comprehensive_request_filter(request):

254

"""Apply multiple filtering operations to requests."""

255

import json

256

from urllib.parse import urlparse, parse_qs, urlencode, urlunparse

257

258

# Filter headers

259

sensitive_headers = ['authorization', 'cookie', 'x-api-key']

260

for header in sensitive_headers:

261

if header in request.headers:

262

request.headers[header] = '[FILTERED]'

263

264

# Filter query parameters

265

parsed_url = urlparse(request.uri)

266

query_params = parse_qs(parsed_url.query)

267

268

# Remove sensitive query parameters

269

for param in ['api_key', 'access_token', 'session']:

270

query_params.pop(param, None)

271

272

# Reconstruct URL

273

new_query = urlencode({k: v[0] for k, v in query_params.items()})

274

new_url = urlunparse((

275

parsed_url.scheme, parsed_url.netloc, parsed_url.path,

276

parsed_url.params, new_query, parsed_url.fragment

277

))

278

request.uri = new_url

279

280

# Filter JSON body content

281

if request.body and request.headers.get('content-type', '').startswith('application/json'):

282

try:

283

data = json.loads(request.body)

284

# Remove sensitive fields

285

data.pop('password', None)

286

data.pop('api_secret', None)

287

request.body = json.dumps(data)

288

except (json.JSONDecodeError, TypeError):

289

pass

290

291

return request

292

293

my_vcr = vcr.VCR(before_record_request=comprehensive_request_filter)

294

```

295

296

## Advanced Filtering Patterns

297

298

### Conditional Filtering

299

300

```python

301

def smart_header_filter(key, value, request):

302

"""Apply different filtering based on request context."""

303

if request.host == 'internal-api.company.com':

304

# More permissive for internal APIs

305

return value if key.lower() != 'authorization' else '[INTERNAL-AUTH]'

306

else:

307

# Strict filtering for external APIs

308

return '[FILTERED]'

309

310

my_vcr = vcr.VCR(

311

filter_headers=[('authorization', smart_header_filter)]

312

)

313

```

314

315

### Environment-Based Filtering

316

317

```python

318

import os

319

320

def get_filter_config():

321

"""Get filtering configuration based on environment."""

322

if os.getenv('VCR_FILTER_MODE') == 'strict':

323

return {

324

'filter_headers': ['authorization', 'cookie', 'x-api-key', 'user-agent'],

325

'filter_query_parameters': ['api_key', 'token', 'session', 'timestamp'],

326

'filter_post_data_parameters': ['password', 'secret', 'key']

327

}

328

elif os.getenv('VCR_FILTER_MODE') == 'minimal':

329

return {

330

'filter_headers': ['authorization'],

331

'filter_query_parameters': ['api_key'],

332

'filter_post_data_parameters': ['password']

333

}

334

else:

335

return {}

336

337

my_vcr = vcr.VCR(**get_filter_config())

338

```

339

340

### Chain Filtering

341

342

```python

343

def create_filter_chain(*filters):

344

"""Create a chain of filter functions."""

345

def chain_filter(request_or_response):

346

result = request_or_response

347

for filter_func in filters:

348

result = filter_func(result)

349

if result is None:

350

break

351

return result

352

return chain_filter

353

354

# Individual filter functions

355

def remove_auth(request):

356

request.headers.pop('authorization', None)

357

return request

358

359

def sanitize_urls(request):

360

# Custom URL sanitization logic

361

return request

362

363

def mask_body_secrets(request):

364

# Custom body masking logic

365

return request

366

367

# Combine filters

368

combined_filter = create_filter_chain(

369

remove_auth,

370

sanitize_urls,

371

mask_body_secrets

372

)

373

374

my_vcr = vcr.VCR(before_record_request=combined_filter)

375

```

376

377

### Binary Content Handling

378

379

```python

380

def handle_binary_responses(response):

381

"""Handle binary response content appropriately."""

382

content_type = response.get('headers', {}).get('content-type', [''])[0]

383

384

if content_type.startswith('image/'):

385

# Replace image data with placeholder

386

response['body']['string'] = b'[BINARY-IMAGE-DATA-REMOVED]'

387

elif content_type.startswith('application/pdf'):

388

# Replace PDF data with placeholder

389

response['body']['string'] = b'[BINARY-PDF-DATA-REMOVED]'

390

elif 'zip' in content_type or 'octet-stream' in content_type:

391

# Replace binary data with size information

392

original_size = len(response['body']['string'])

393

response['body']['string'] = f'[BINARY-DATA-{original_size}-BYTES]'.encode()

394

395

return response

396

397

my_vcr = vcr.VCR(before_record_response=handle_binary_responses)

398

```