0
# Instrumentation and Hooks
1
2
Comprehensive instrumentation system providing observability into retry behavior through hooks. Includes built-in integrations for logging, Prometheus metrics, and structured logging, plus support for custom hooks and context managers.
3
4
## Capabilities
5
6
### Hook Management
7
8
Functions for configuring and managing retry hooks that are called when retries are scheduled.
9
10
```python { .api }
11
def set_on_retry_hooks(
12
hooks: Iterable[RetryHook | RetryHookFactory] | None
13
) -> None:
14
"""
15
Set hooks that are called after a retry has been scheduled.
16
17
Parameters:
18
- hooks: Iterable of RetryHook or RetryHookFactory instances, or None
19
20
Behavior:
21
- None: Reset to default hooks (logging + metrics if available)
22
- Empty iterable: Disable all instrumentation
23
- Hook instances: Use provided hooks
24
"""
25
26
def get_on_retry_hooks() -> tuple[RetryHook, ...]:
27
"""
28
Get hooks that are called after a retry has been scheduled.
29
30
Returns:
31
tuple[RetryHook, ...]: Currently active hooks
32
33
Note: Calling this function initializes any RetryHookFactory instances
34
that haven't been initialized yet.
35
"""
36
```
37
38
**Usage Examples:**
39
40
```python
41
import stamina
42
from stamina.instrumentation import (
43
set_on_retry_hooks,
44
get_on_retry_hooks,
45
LoggingOnRetryHook,
46
PrometheusOnRetryHook
47
)
48
49
# Use only logging, disable metrics
50
set_on_retry_hooks([LoggingOnRetryHook])
51
52
# Use custom hooks
53
def custom_hook(details):
54
print(f"Retry {details.retry_num} for {details.name}: {details.caused_by}")
55
56
set_on_retry_hooks([custom_hook, LoggingOnRetryHook])
57
58
# Disable all instrumentation
59
set_on_retry_hooks([])
60
61
# Reset to defaults
62
set_on_retry_hooks(None)
63
64
# Check current hooks
65
current_hooks = get_on_retry_hooks()
66
print(f"Active hooks: {len(current_hooks)}")
67
```
68
69
### Retry Hook Protocol
70
71
The `RetryHook` protocol defines the interface for retry hooks.
72
73
```python { .api }
74
class RetryHook(Protocol):
75
"""
76
Protocol for retry hook callables.
77
78
Hooks are called after an attempt has failed and a retry has been scheduled.
79
"""
80
81
def __call__(
82
self, details: RetryDetails
83
) -> None | AbstractContextManager[None]:
84
"""
85
Handle retry event.
86
87
Parameters:
88
- details: RetryDetails instance with retry information
89
90
Returns:
91
- None: Simple hook that performs logging/metrics/etc
92
- AbstractContextManager: Context manager entered when retry is
93
scheduled and exited before the retry attempt
94
"""
95
```
96
97
**Custom Hook Examples:**
98
99
```python
100
from stamina.instrumentation import RetryDetails
101
import logging
102
103
# Simple logging hook
104
def simple_logger(details: RetryDetails) -> None:
105
"""Log retry attempts to standard logger."""
106
logging.warning(
107
f"Retrying {details.name} (attempt {details.retry_num}): {details.caused_by}"
108
)
109
110
# Hook with context manager
111
class MetricsHook:
112
def __init__(self):
113
self.retry_counts = {}
114
115
def __call__(self, details: RetryDetails):
116
# Count retries
117
self.retry_counts[details.name] = self.retry_counts.get(details.name, 0) + 1
118
119
# Return context manager for timing
120
return self._time_context(details)
121
122
def _time_context(self, details):
123
import time
124
import contextlib
125
126
@contextlib.contextmanager
127
def timer():
128
start = time.time()
129
try:
130
yield
131
finally:
132
duration = time.time() - start
133
print(f"Retry {details.retry_num} took {duration:.2f}s")
134
135
return timer()
136
137
# Use custom hooks
138
metrics_hook = MetricsHook()
139
set_on_retry_hooks([simple_logger, metrics_hook])
140
```
141
142
### Retry Details Data Class
143
144
The `RetryDetails` class provides comprehensive information about retry attempts.
145
146
```python { .api }
147
@dataclass(frozen=True)
148
class RetryDetails:
149
"""
150
Details about a retry attempt passed to RetryHook instances.
151
152
All times are in seconds as float values.
153
"""
154
155
name: str # Name of callable being retried
156
args: tuple[object, ...] # Positional arguments passed to callable
157
kwargs: dict[str, object] # Keyword arguments passed to callable
158
retry_num: int # Retry attempt number (starts at 1 after first failure)
159
wait_for: float # Seconds to wait before next attempt
160
waited_so_far: float # Total seconds waited so far for this callable
161
caused_by: Exception # Exception that triggered this retry attempt
162
```
163
164
**Usage Examples:**
165
166
```python
167
def detailed_hook(details: RetryDetails) -> None:
168
"""Hook that logs comprehensive retry information."""
169
print(f"""
170
Retry Event:
171
Function: {details.name}
172
Attempt: {details.retry_num}
173
Error: {type(details.caused_by).__name__}: {details.caused_by}
174
Next wait: {details.wait_for:.2f}s
175
Total waited: {details.waited_so_far:.2f}s
176
Args: {details.args}
177
Kwargs: {details.kwargs}
178
""")
179
180
# Hook that makes decisions based on retry details
181
def adaptive_hook(details: RetryDetails) -> None:
182
"""Hook with different behavior based on retry context."""
183
if details.retry_num == 1:
184
# First retry - log at info level
185
logging.info(f"First retry for {details.name}")
186
elif details.retry_num >= 5:
187
# Many retries - escalate to error level
188
logging.error(f"Multiple retries ({details.retry_num}) for {details.name}")
189
# Could trigger alerts, circuit breakers, etc.
190
191
# Adjust behavior based on wait time
192
if details.wait_for > 30:
193
logging.warning(f"Long wait ({details.wait_for}s) for {details.name}")
194
```
195
196
### Retry Hook Factory
197
198
The `RetryHookFactory` class enables delayed initialization of hooks, useful for expensive imports or setup.
199
200
```python { .api }
201
@dataclass(frozen=True)
202
class RetryHookFactory:
203
"""
204
Wraps a callable that returns a RetryHook.
205
206
Factories are called on the first scheduled retry to allow
207
delayed initialization of expensive resources.
208
"""
209
210
hook_factory: Callable[[], RetryHook]
211
```
212
213
**Usage Examples:**
214
215
```python
216
from stamina.instrumentation import RetryHookFactory
217
import functools
218
219
# Factory for expensive imports
220
def create_prometheus_hook():
221
"""Factory that delays prometheus import."""
222
import prometheus_client # Expensive import
223
224
counter = prometheus_client.Counter(
225
'my_app_retries_total',
226
'Total retries',
227
['function_name', 'error_type']
228
)
229
230
def prometheus_hook(details: RetryDetails) -> None:
231
counter.labels(
232
function_name=details.name,
233
error_type=type(details.caused_by).__name__
234
).inc()
235
236
return prometheus_hook
237
238
# Create factory
239
prometheus_factory = RetryHookFactory(create_prometheus_hook)
240
241
# Factory with configuration
242
def create_database_hook(connection_string: str):
243
"""Factory that creates database logging hook."""
244
def init_hook():
245
import database_lib # Import when needed
246
conn = database_lib.connect(connection_string)
247
248
def db_hook(details: RetryDetails) -> None:
249
conn.execute(
250
"INSERT INTO retry_log (name, attempt, error) VALUES (?, ?, ?)",
251
(details.name, details.retry_num, str(details.caused_by))
252
)
253
254
return db_hook
255
256
return init_hook
257
258
# Use functools.partial for configuration
259
db_factory = RetryHookFactory(
260
functools.partial(create_database_hook, "postgresql://localhost/logs")
261
)
262
263
# Set factories as hooks
264
set_on_retry_hooks([prometheus_factory, db_factory])
265
```
266
267
### Built-in Hook Integrations
268
269
Stamina provides pre-built integrations for common observability tools.
270
271
```python { .api }
272
# Built-in hook factory instances
273
LoggingOnRetryHook: RetryHookFactory # Standard library logging integration
274
StructlogOnRetryHook: RetryHookFactory # Structlog integration
275
PrometheusOnRetryHook: RetryHookFactory # Prometheus metrics integration
276
277
# Prometheus utility function
278
def get_prometheus_counter() -> Counter | None:
279
"""
280
Get the Prometheus counter for retry metrics.
281
282
Returns:
283
prometheus_client.Counter or None if not active
284
285
The counter has labels: callable, retry_num, error_type
286
"""
287
```
288
289
**Built-in Integration Examples:**
290
291
```python
292
from stamina.instrumentation import (
293
LoggingOnRetryHook,
294
StructlogOnRetryHook,
295
PrometheusOnRetryHook,
296
get_prometheus_counter
297
)
298
299
# Standard logging (active by default if structlog unavailable)
300
set_on_retry_hooks([LoggingOnRetryHook])
301
302
# Structured logging (active by default if structlog available)
303
set_on_retry_hooks([StructlogOnRetryHook])
304
305
# Prometheus metrics (active by default if prometheus-client available)
306
set_on_retry_hooks([PrometheusOnRetryHook])
307
308
# Combine multiple integrations
309
set_on_retry_hooks([
310
StructlogOnRetryHook,
311
PrometheusOnRetryHook
312
])
313
314
# Access Prometheus counter for custom queries
315
counter = get_prometheus_counter()
316
if counter:
317
# Get retry count for specific function
318
retry_count = counter.labels(
319
callable="my_function",
320
retry_num="1",
321
error_type="ConnectionError"
322
)._value._value
323
print(f"Retry count: {retry_count}")
324
```
325
326
### Default Hook Behavior
327
328
Stamina automatically configures hooks based on available dependencies:
329
330
1. **Prometheus integration**: Active if `prometheus-client` is installed
331
2. **Structured logging**: Active if `structlog` is installed
332
3. **Standard logging**: Active if `structlog` is NOT installed
333
4. **Custom hooks**: Can be added alongside or replace defaults
334
335
```python
336
# Check what hooks are active by default
337
from stamina.instrumentation import get_on_retry_hooks
338
339
default_hooks = get_on_retry_hooks()
340
for hook in default_hooks:
341
print(f"Default hook: {type(hook).__name__}")
342
343
# Example output might be:
344
# Default hook: prometheus_hook
345
# Default hook: structlog_hook
346
```
347
348
## Advanced Instrumentation Patterns
349
350
### Conditional Hooks
351
352
Create hooks that activate based on conditions:
353
354
```python
355
class ConditionalHook:
356
def __init__(self, condition_func, hook_func):
357
self.condition = condition_func
358
self.hook = hook_func
359
360
def __call__(self, details: RetryDetails) -> None:
361
if self.condition(details):
362
self.hook(details)
363
364
# Only log retries for specific functions
365
def should_log(details):
366
return details.name.startswith("critical_")
367
368
conditional_logger = ConditionalHook(
369
should_log,
370
lambda details: print(f"Critical function retry: {details.name}")
371
)
372
373
set_on_retry_hooks([conditional_logger])
374
```
375
376
### Hook Composition
377
378
Combine multiple hooks into composite hooks:
379
380
```python
381
class CompositeHook:
382
def __init__(self, *hooks):
383
self.hooks = hooks
384
385
def __call__(self, details: RetryDetails) -> None:
386
for hook in self.hooks:
387
try:
388
hook(details)
389
except Exception as e:
390
# Log hook errors but don't fail retries
391
logging.error(f"Hook {hook} failed: {e}")
392
393
# Combine logging and metrics
394
composite = CompositeHook(
395
lambda d: logging.info(f"Retry: {d.name}"),
396
lambda d: metrics.increment(f"retry.{d.name}"),
397
lambda d: alert_if_many_retries(d)
398
)
399
400
set_on_retry_hooks([composite])
401
```
402
403
### Context Manager Hooks
404
405
Use context manager hooks for resource management:
406
407
```python
408
import contextlib
409
import time
410
411
class TimingContextHook:
412
def __call__(self, details: RetryDetails):
413
return self._create_timing_context(details)
414
415
@contextlib.contextmanager
416
def _create_timing_context(self, details):
417
start_time = time.time()
418
print(f"Starting retry {details.retry_num} for {details.name}")
419
420
try:
421
yield
422
finally:
423
duration = time.time() - start_time
424
print(f"Retry {details.retry_num} completed in {duration:.2f}s")
425
426
# Hook that manages database connections
427
class DatabaseContextHook:
428
def __init__(self, connection_pool):
429
self.pool = connection_pool
430
431
def __call__(self, details: RetryDetails):
432
return self._db_context(details)
433
434
@contextlib.contextmanager
435
def _db_context(self, details):
436
conn = self.pool.get_connection()
437
try:
438
# Log retry start
439
conn.execute(
440
"INSERT INTO retry_events (name, attempt, status) VALUES (?, ?, 'started')",
441
(details.name, details.retry_num)
442
)
443
yield
444
# Log retry success (if we get here)
445
conn.execute(
446
"UPDATE retry_events SET status='completed' WHERE name=? AND attempt=?",
447
(details.name, details.retry_num)
448
)
449
except Exception:
450
# Log retry failure
451
conn.execute(
452
"UPDATE retry_events SET status='failed' WHERE name=? AND attempt=?",
453
(details.name, details.retry_num)
454
)
455
raise
456
finally:
457
self.pool.return_connection(conn)
458
459
timing_hook = TimingContextHook()
460
# db_hook = DatabaseContextHook(connection_pool)
461
set_on_retry_hooks([timing_hook])
462
```