0
# Data Filtering
1
2
Functions for removing or replacing sensitive data in requests and responses before recording to cassettes. VCR.py provides comprehensive filtering capabilities to sanitize sensitive information while maintaining test functionality.
3
4
## Capabilities
5
6
### Header Filtering
7
8
Functions for modifying request and response headers before recording.
9
10
```python { .api }
11
def replace_headers(request: Request, replacements: list) -> Request:
12
"""
13
Replace headers in request according to replacements list.
14
15
Args:
16
request: Request object to modify
17
replacements: List of (key, value) tuples where value can be:
18
- str: Simple replacement value
19
- None: Remove the header entirely
20
- callable: Function(key, value, request) -> str or None
21
22
Returns:
23
Request: Modified request object
24
"""
25
26
def remove_headers(request: Request, headers_to_remove: list) -> Request:
27
"""
28
Remove specified headers from request.
29
30
Args:
31
request: Request object to modify
32
headers_to_remove: List of header names to remove
33
34
Returns:
35
Request: Request with specified headers removed
36
"""
37
```
38
39
### Query Parameter Filtering
40
41
Functions for sanitizing URL query parameters.
42
43
```python { .api }
44
def replace_query_parameters(request: Request, replacements: list) -> Request:
45
"""
46
Replace or remove query parameters from request URI.
47
48
Args:
49
request: Request object to modify
50
replacements: List of (param_name, value) tuples where value can be:
51
- str: Replacement value
52
- None: Remove parameter entirely
53
- callable: Function(key, value, request) -> str or None
54
55
Returns:
56
Request: Request with modified query parameters
57
"""
58
```
59
60
### POST Data Filtering
61
62
Functions for sanitizing form data and request body parameters.
63
64
```python { .api }
65
def replace_post_data_parameters(request: Request, replacements: list) -> Request:
66
"""
67
Replace or remove POST data parameters from request body.
68
69
Args:
70
request: Request object to modify
71
replacements: List of (param_name, value) tuples where value can be:
72
- str: Replacement value
73
- None: Remove parameter entirely
74
- callable: Function(key, value, request) -> str or None
75
76
Returns:
77
Request: Request with modified POST data
78
"""
79
```
80
81
### Response Filtering
82
83
Functions for processing response content before recording.
84
85
```python { .api }
86
def decode_response(response) -> Response:
87
"""
88
Decode compressed response content (gzip, deflate).
89
90
Args:
91
response: Response object to decode
92
93
Returns:
94
Response: Response with decoded content
95
"""
96
```
97
98
## Usage Examples
99
100
### Basic Header Filtering
101
102
```python
103
import vcr
104
105
# Filter sensitive headers
106
my_vcr = vcr.VCR(
107
filter_headers=['authorization', 'x-api-key', 'cookie']
108
)
109
110
@my_vcr.use_cassette('filtered.yaml')
111
def test_with_filtered_headers():
112
# Authorization headers will be removed from recorded cassette
113
response = requests.get(
114
'https://api.example.com/data',
115
headers={'Authorization': 'Bearer secret-token'}
116
)
117
```
118
119
### Header Replacement with Custom Values
120
121
```python
122
# Replace headers with static values
123
my_vcr = vcr.VCR(
124
filter_headers=[
125
('authorization', 'Bearer REDACTED'),
126
('x-api-key', 'FILTERED'),
127
('user-agent', None) # Remove entirely
128
]
129
)
130
```
131
132
### Dynamic Header Filtering
133
134
```python
135
def sanitize_auth_header(key, value, request):
136
"""Custom function to sanitize authorization headers."""
137
if value.startswith('Bearer '):
138
return 'Bearer [FILTERED-TOKEN]'
139
elif value.startswith('Basic '):
140
return 'Basic [FILTERED-CREDENTIALS]'
141
else:
142
return '[FILTERED-AUTH]'
143
144
my_vcr = vcr.VCR(
145
filter_headers=[
146
('authorization', sanitize_auth_header),
147
('x-session-id', lambda k, v, r: 'session-redacted')
148
]
149
)
150
```
151
152
### Query Parameter Filtering
153
154
```python
155
# Remove sensitive query parameters
156
my_vcr = vcr.VCR(
157
filter_query_parameters=['api_key', 'access_token', 'session_id']
158
)
159
160
@my_vcr.use_cassette('no_secrets.yaml')
161
def test_filtered_query_params():
162
# These parameters will be removed from recorded URLs
163
response = requests.get(
164
'https://api.example.com/data?api_key=secret123&user_id=456'
165
)
166
# Recorded URL: https://api.example.com/data?user_id=456
167
```
168
169
### Query Parameter Replacement
170
171
```python
172
my_vcr = vcr.VCR(
173
filter_query_parameters=[
174
('api_key', 'REDACTED'),
175
('timestamp', lambda k, v, r: '1234567890'), # Fixed timestamp
176
('nonce', None) # Remove entirely
177
]
178
)
179
```
180
181
### POST Data Filtering
182
183
```python
184
# Filter form data parameters
185
my_vcr = vcr.VCR(
186
filter_post_data_parameters=['password', 'credit_card', 'ssn']
187
)
188
189
@my_vcr.use_cassette('safe_posts.yaml')
190
def test_filtered_post_data():
191
# Sensitive form data will be removed from recordings
192
response = requests.post(
193
'https://api.example.com/submit',
194
data={'username': 'john', 'password': 'secret123', 'email': 'john@example.com'}
195
)
196
# Recorded data: {'username': 'john', 'email': 'john@example.com'}
197
```
198
199
### Custom POST Data Processing
200
201
```python
202
def mask_credit_card(key, value, request):
203
"""Mask credit card numbers but preserve format."""
204
if len(value) == 16 and value.isdigit():
205
return f"****-****-****-{value[-4:]}"
206
return value
207
208
my_vcr = vcr.VCR(
209
filter_post_data_parameters=[
210
('password', '[FILTERED]'),
211
('card_number', mask_credit_card)
212
]
213
)
214
```
215
216
### Response Content Filtering
217
218
```python
219
def filter_response_data(response):
220
"""Custom response filtering function."""
221
import json
222
223
try:
224
# Parse JSON response
225
data = json.loads(response['body']['string'])
226
227
# Remove sensitive fields
228
if 'user' in data:
229
data['user'].pop('email', None)
230
data['user'].pop('phone', None)
231
232
# Mask API keys in response
233
if 'api_keys' in data:
234
data['api_keys'] = ['[REDACTED]'] * len(data['api_keys'])
235
236
# Update response body
237
response['body']['string'] = json.dumps(data)
238
except (json.JSONDecodeError, KeyError, TypeError):
239
# Non-JSON response or missing fields - leave unchanged
240
pass
241
242
return response
243
244
my_vcr = vcr.VCR(
245
before_record_response=filter_response_data,
246
decode_compressed_response=True # Decode gzipped responses first
247
)
248
```
249
250
### Comprehensive Request Filtering
251
252
```python
253
def comprehensive_request_filter(request):
254
"""Apply multiple filtering operations to requests."""
255
import json
256
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
257
258
# Filter headers
259
sensitive_headers = ['authorization', 'cookie', 'x-api-key']
260
for header in sensitive_headers:
261
if header in request.headers:
262
request.headers[header] = '[FILTERED]'
263
264
# Filter query parameters
265
parsed_url = urlparse(request.uri)
266
query_params = parse_qs(parsed_url.query)
267
268
# Remove sensitive query parameters
269
for param in ['api_key', 'access_token', 'session']:
270
query_params.pop(param, None)
271
272
# Reconstruct URL
273
new_query = urlencode({k: v[0] for k, v in query_params.items()})
274
new_url = urlunparse((
275
parsed_url.scheme, parsed_url.netloc, parsed_url.path,
276
parsed_url.params, new_query, parsed_url.fragment
277
))
278
request.uri = new_url
279
280
# Filter JSON body content
281
if request.body and request.headers.get('content-type', '').startswith('application/json'):
282
try:
283
data = json.loads(request.body)
284
# Remove sensitive fields
285
data.pop('password', None)
286
data.pop('api_secret', None)
287
request.body = json.dumps(data)
288
except (json.JSONDecodeError, TypeError):
289
pass
290
291
return request
292
293
my_vcr = vcr.VCR(before_record_request=comprehensive_request_filter)
294
```
295
296
## Advanced Filtering Patterns
297
298
### Conditional Filtering
299
300
```python
301
def smart_header_filter(key, value, request):
302
"""Apply different filtering based on request context."""
303
if request.host == 'internal-api.company.com':
304
# More permissive for internal APIs
305
return value if key.lower() != 'authorization' else '[INTERNAL-AUTH]'
306
else:
307
# Strict filtering for external APIs
308
return '[FILTERED]'
309
310
my_vcr = vcr.VCR(
311
filter_headers=[('authorization', smart_header_filter)]
312
)
313
```
314
315
### Environment-Based Filtering
316
317
```python
318
import os
319
320
def get_filter_config():
321
"""Get filtering configuration based on environment."""
322
if os.getenv('VCR_FILTER_MODE') == 'strict':
323
return {
324
'filter_headers': ['authorization', 'cookie', 'x-api-key', 'user-agent'],
325
'filter_query_parameters': ['api_key', 'token', 'session', 'timestamp'],
326
'filter_post_data_parameters': ['password', 'secret', 'key']
327
}
328
elif os.getenv('VCR_FILTER_MODE') == 'minimal':
329
return {
330
'filter_headers': ['authorization'],
331
'filter_query_parameters': ['api_key'],
332
'filter_post_data_parameters': ['password']
333
}
334
else:
335
return {}
336
337
my_vcr = vcr.VCR(**get_filter_config())
338
```
339
340
### Chain Filtering
341
342
```python
343
def create_filter_chain(*filters):
344
"""Create a chain of filter functions."""
345
def chain_filter(request_or_response):
346
result = request_or_response
347
for filter_func in filters:
348
result = filter_func(result)
349
if result is None:
350
break
351
return result
352
return chain_filter
353
354
# Individual filter functions
355
def remove_auth(request):
356
request.headers.pop('authorization', None)
357
return request
358
359
def sanitize_urls(request):
360
# Custom URL sanitization logic
361
return request
362
363
def mask_body_secrets(request):
364
# Custom body masking logic
365
return request
366
367
# Combine filters
368
combined_filter = create_filter_chain(
369
remove_auth,
370
sanitize_urls,
371
mask_body_secrets
372
)
373
374
my_vcr = vcr.VCR(before_record_request=combined_filter)
375
```
376
377
### Binary Content Handling
378
379
```python
380
def handle_binary_responses(response):
381
"""Handle binary response content appropriately."""
382
content_type = response.get('headers', {}).get('content-type', [''])[0]
383
384
if content_type.startswith('image/'):
385
# Replace image data with placeholder
386
response['body']['string'] = b'[BINARY-IMAGE-DATA-REMOVED]'
387
elif content_type.startswith('application/pdf'):
388
# Replace PDF data with placeholder
389
response['body']['string'] = b'[BINARY-PDF-DATA-REMOVED]'
390
elif 'zip' in content_type or 'octet-stream' in content_type:
391
# Replace binary data with size information
392
original_size = len(response['body']['string'])
393
response['body']['string'] = f'[BINARY-DATA-{original_size}-BYTES]'.encode()
394
395
return response
396
397
my_vcr = vcr.VCR(before_record_response=handle_binary_responses)
398
```