0
# Advanced Features
1
2
Advanced customization options for sklearn-crfsuite including custom trainer classes, specialized training workflows, and detailed training logging. These features are useful for users who need fine-grained control over the training process or want detailed insights into model convergence.
3
4
## Capabilities
5
6
### Custom Training with Detailed Logging
7
8
The LinePerIterationTrainer provides enhanced training output with detailed per-iteration statistics, making it easier to monitor training progress and diagnose convergence issues.
9
10
```python { .api }
11
class LinePerIterationTrainer:
12
"""
13
Enhanced pycrfsuite.Trainer that prints detailed information about each
14
training iteration on a single line with tabulated final results.
15
16
This trainer extends the standard pycrfsuite.Trainer with:
17
- Per-iteration progress logging
18
- Detailed performance metrics during training
19
- Final performance summary table
20
"""
21
22
def on_iteration(self, log, info):
23
"""Called after each training iteration to display progress."""
24
25
def on_optimization_end(self, log):
26
"""Called when training completes to display final results table."""
27
```
28
29
**Usage Example:**
30
31
```python
32
from sklearn_crfsuite import CRF
33
from sklearn_crfsuite.trainer import LinePerIterationTrainer
34
35
# Use custom trainer for detailed training logs
36
crf = CRF(
37
algorithm='lbfgs',
38
c1=0.1,
39
c2=0.1,
40
max_iterations=100,
41
verbose=True, # Enable trainer output
42
trainer_cls=LinePerIterationTrainer # Use enhanced trainer
43
)
44
45
# Training will now show detailed per-iteration progress
46
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
47
48
# Example output during training:
49
# Iter 1 time=0.05 loss=45.23 active=1250
50
# Iter 2 time=0.04 loss=38.17 active=1180
51
# Iter 3 time=0.04 loss=32.91 active=1150 precision=0.752 recall=0.698 F1=0.724 Acc(item/seq)=0.834 0.567
52
# ...
53
# =====================================
54
# Label Precision Recall F1 Support
55
# B-PER 0.856 0.792 0.823 125
56
# I-PER 0.743 0.681 0.711 89
57
# B-LOC 0.912 0.878 0.895 156
58
# I-LOC 0.834 0.798 0.816 67
59
# O 0.945 0.967 0.956 1543
60
# -------------------------------------
61
```
62
63
### Training Progress Analysis
64
65
Access detailed training logs for analysis and debugging:
66
67
```python { .api }
68
@property
69
def training_log_:
70
"""
71
Training log parser containing detailed iteration information.
72
Available after model training completes.
73
"""
74
```
75
76
**Usage Example:**
77
78
```python
79
# Access training log after fitting
80
crf.fit(X_train, y_train)
81
training_log = crf.training_log_
82
83
# Analyze training progress
84
if training_log:
85
last_iteration = training_log.last_iteration
86
print(f"Final loss: {last_iteration.get('loss', 'N/A')}")
87
print(f"Training time: {last_iteration.get('time', 'N/A')} seconds")
88
print(f"Active features: {last_iteration.get('active_features', 'N/A')}")
89
90
# Check if validation scores are available
91
if 'scores' in last_iteration:
92
print("\nFinal validation scores per label:")
93
for label, score in last_iteration['scores'].items():
94
print(f"{label}: P={score.precision:.3f} R={score.recall:.3f} F1={score.f1:.3f}")
95
```
96
97
### Custom Trainer Implementation
98
99
Create custom trainers for specialized training workflows:
100
101
**Usage Example:**
102
103
```python
104
import pycrfsuite
105
from sklearn_crfsuite.trainer import LinePerIterationTrainer
106
107
class CustomTrainer(LinePerIterationTrainer):
108
"""Custom trainer with additional logging and early stopping."""
109
110
def __init__(self, *args, **kwargs):
111
super().__init__(*args, **kwargs)
112
self.iteration_losses = []
113
self.early_stop_patience = 10
114
self.early_stop_threshold = 0.001
115
116
def on_iteration(self, log, info):
117
# Store loss for early stopping analysis
118
current_loss = info.get('loss', float('inf'))
119
self.iteration_losses.append(current_loss)
120
121
# Check for early stopping
122
if len(self.iteration_losses) > self.early_stop_patience:
123
recent_losses = self.iteration_losses[-self.early_stop_patience:]
124
if max(recent_losses) - min(recent_losses) < self.early_stop_threshold:
125
print(f"\\nEarly stopping at iteration {info['num']} - loss converged")
126
127
# Call parent method for standard logging
128
super().on_iteration(log, info)
129
130
def on_optimization_end(self, log):
131
print(f"\\nTraining completed with {len(self.iteration_losses)} iterations")
132
print(f"Final loss: {self.iteration_losses[-1]:.4f}")
133
super().on_optimization_end(log)
134
135
# Use custom trainer
136
crf = CRF(
137
algorithm='lbfgs',
138
c1=0.1,
139
c2=0.1,
140
trainer_cls=CustomTrainer,
141
verbose=True
142
)
143
```
144
145
### Training Diagnostics
146
147
Advanced diagnostic functions for training analysis:
148
149
**Usage Example:**
150
151
```python
152
def analyze_training_convergence(crf):
153
"""Analyze training convergence patterns."""
154
if not crf.training_log_:
155
print("No training log available")
156
return
157
158
log = crf.training_log_
159
iterations = []
160
losses = []
161
162
# Extract iteration data (this is conceptual - actual log structure may vary)
163
for i in range(len(log.iterations)):
164
iter_info = log.iterations[i]
165
iterations.append(iter_info.get('num', i))
166
losses.append(iter_info.get('loss', 0))
167
168
# Analyze convergence
169
if len(losses) > 10:
170
early_loss = sum(losses[:5]) / 5
171
late_loss = sum(losses[-5:]) / 5
172
improvement = (early_loss - late_loss) / early_loss * 100
173
print(f"Loss improvement: {improvement:.2f}%")
174
175
# Check for overfitting indicators
176
if len(losses) > 20:
177
mid_loss = sum(losses[10:15]) / 5
178
if late_loss > mid_loss:
179
print("Warning: Possible overfitting detected")
180
181
# Usage
182
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
183
analyze_training_convergence(crf)
184
```
185
186
### Algorithm-Specific Configuration
187
188
Advanced configuration for different training algorithms:
189
190
**Usage Example:**
191
192
```python
193
# L-BFGS with custom line search
194
crf_lbfgs = CRF(
195
algorithm='lbfgs',
196
linesearch='StrongBacktracking', # More aggressive line search
197
max_linesearch=50, # More line search attempts
198
num_memories=10, # More L-BFGS memories
199
trainer_cls=LinePerIterationTrainer,
200
verbose=True
201
)
202
203
# Stochastic gradient descent with calibration
204
crf_sgd = CRF(
205
algorithm='l2sgd',
206
calibration_eta=0.01, # Lower initial learning rate
207
calibration_rate=1.5, # Slower learning rate adjustment
208
calibration_samples=2000, # More calibration samples
209
trainer_cls=LinePerIterationTrainer,
210
verbose=True
211
)
212
213
# Passive Aggressive with detailed monitoring
214
crf_pa = CRF(
215
algorithm='pa',
216
pa_type=2, # PA-II variant
217
c=0.1, # Lower aggressiveness
218
error_sensitive=True, # Include error count in objective
219
trainer_cls=LinePerIterationTrainer,
220
verbose=True
221
)
222
```