Tessl Tile for pypi/sklearn-crfsuite@0.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced.md crf-estimator.md index.md metrics.md sklearn-integration.md utils.md

advanced.mddocs/

0
# Advanced Features
1

2
Advanced customization options for sklearn-crfsuite including custom trainer classes, specialized training workflows, and detailed training logging. These features are useful for users who need fine-grained control over the training process or want detailed insights into model convergence.
3

4
## Capabilities
5

6
### Custom Training with Detailed Logging
7

8
The LinePerIterationTrainer provides enhanced training output with detailed per-iteration statistics, making it easier to monitor training progress and diagnose convergence issues.
9

10
```python { .api }
11
class LinePerIterationTrainer:
12
    """
13
    Enhanced pycrfsuite.Trainer that prints detailed information about each 
14
    training iteration on a single line with tabulated final results.
15
    
16
    This trainer extends the standard pycrfsuite.Trainer with:
17
    - Per-iteration progress logging
18
    - Detailed performance metrics during training
19
    - Final performance summary table
20
    """
21
    
22
    def on_iteration(self, log, info):
23
        """Called after each training iteration to display progress."""
24
    
25
    def on_optimization_end(self, log):
26
        """Called when training completes to display final results table."""
27
```
28

29
**Usage Example:**
30

31
```python
32
from sklearn_crfsuite import CRF
33
from sklearn_crfsuite.trainer import LinePerIterationTrainer
34

35
# Use custom trainer for detailed training logs
36
crf = CRF(
37
    algorithm='lbfgs',
38
    c1=0.1,
39
    c2=0.1,
40
    max_iterations=100,
41
    verbose=True,  # Enable trainer output
42
    trainer_cls=LinePerIterationTrainer  # Use enhanced trainer
43
)
44

45
# Training will now show detailed per-iteration progress
46
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
47

48
# Example output during training:
49
# Iter 1   time=0.05  loss=45.23    active=1250  
50
# Iter 2   time=0.04  loss=38.17    active=1180  
51
# Iter 3   time=0.04  loss=32.91    active=1150  precision=0.752  recall=0.698  F1=0.724  Acc(item/seq)=0.834 0.567
52
# ...
53
# =====================================
54
# Label      Precision  Recall  F1     Support
55
# B-PER      0.856      0.792   0.823  125
56
# I-PER      0.743      0.681   0.711  89
57
# B-LOC      0.912      0.878   0.895  156
58
# I-LOC      0.834      0.798   0.816  67
59
# O          0.945      0.967   0.956  1543
60
# -------------------------------------
61
```
62

63
### Training Progress Analysis
64

65
Access detailed training logs for analysis and debugging:
66

67
```python { .api }
68
@property
69
def training_log_:
70
    """
71
    Training log parser containing detailed iteration information.
72
    Available after model training completes.
73
    """
74
```
75

76
**Usage Example:**
77

78
```python
79
# Access training log after fitting
80
crf.fit(X_train, y_train)
81
training_log = crf.training_log_
82

83
# Analyze training progress
84
if training_log:
85
    last_iteration = training_log.last_iteration
86
    print(f"Final loss: {last_iteration.get('loss', 'N/A')}")
87
    print(f"Training time: {last_iteration.get('time', 'N/A')} seconds")
88
    print(f"Active features: {last_iteration.get('active_features', 'N/A')}")
89
    
90
    # Check if validation scores are available
91
    if 'scores' in last_iteration:
92
        print("\nFinal validation scores per label:")
93
        for label, score in last_iteration['scores'].items():
94
            print(f"{label}: P={score.precision:.3f} R={score.recall:.3f} F1={score.f1:.3f}")
95
```
96

97
### Custom Trainer Implementation
98

99
Create custom trainers for specialized training workflows:
100

101
**Usage Example:**
102

103
```python
104
import pycrfsuite
105
from sklearn_crfsuite.trainer import LinePerIterationTrainer
106

107
class CustomTrainer(LinePerIterationTrainer):
108
    """Custom trainer with additional logging and early stopping."""
109
    
110
    def __init__(self, *args, **kwargs):
111
        super().__init__(*args, **kwargs)
112
        self.iteration_losses = []
113
        self.early_stop_patience = 10
114
        self.early_stop_threshold = 0.001
115
    
116
    def on_iteration(self, log, info):
117
        # Store loss for early stopping analysis
118
        current_loss = info.get('loss', float('inf'))
119
        self.iteration_losses.append(current_loss)
120
        
121
        # Check for early stopping
122
        if len(self.iteration_losses) > self.early_stop_patience:
123
            recent_losses = self.iteration_losses[-self.early_stop_patience:]
124
            if max(recent_losses) - min(recent_losses) < self.early_stop_threshold:
125
                print(f"\\nEarly stopping at iteration {info['num']} - loss converged")
126
        
127
        # Call parent method for standard logging
128
        super().on_iteration(log, info)
129
    
130
    def on_optimization_end(self, log):
131
        print(f"\\nTraining completed with {len(self.iteration_losses)} iterations")
132
        print(f"Final loss: {self.iteration_losses[-1]:.4f}")
133
        super().on_optimization_end(log)
134

135
# Use custom trainer
136
crf = CRF(
137
    algorithm='lbfgs',
138
    c1=0.1,
139
    c2=0.1,
140
    trainer_cls=CustomTrainer,
141
    verbose=True
142
)
143
```
144

145
### Training Diagnostics
146

147
Advanced diagnostic functions for training analysis:
148

149
**Usage Example:**
150

151
```python
152
def analyze_training_convergence(crf):
153
    """Analyze training convergence patterns."""
154
    if not crf.training_log_:
155
        print("No training log available")
156
        return
157
    
158
    log = crf.training_log_
159
    iterations = []
160
    losses = []
161
    
162
    # Extract iteration data (this is conceptual - actual log structure may vary)
163
    for i in range(len(log.iterations)):
164
        iter_info = log.iterations[i]
165
        iterations.append(iter_info.get('num', i))
166
        losses.append(iter_info.get('loss', 0))
167
    
168
    # Analyze convergence
169
    if len(losses) > 10:
170
        early_loss = sum(losses[:5]) / 5
171
        late_loss = sum(losses[-5:]) / 5
172
        improvement = (early_loss - late_loss) / early_loss * 100
173
        print(f"Loss improvement: {improvement:.2f}%")
174
        
175
        # Check for overfitting indicators
176
        if len(losses) > 20:
177
            mid_loss = sum(losses[10:15]) / 5
178
            if late_loss > mid_loss:
179
                print("Warning: Possible overfitting detected")
180

181
# Usage
182
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
183
analyze_training_convergence(crf)
184
```
185

186
### Algorithm-Specific Configuration
187

188
Advanced configuration for different training algorithms:
189

190
**Usage Example:**
191

192
```python
193
# L-BFGS with custom line search
194
crf_lbfgs = CRF(
195
    algorithm='lbfgs',
196
    linesearch='StrongBacktracking',  # More aggressive line search
197
    max_linesearch=50,  # More line search attempts
198
    num_memories=10,  # More L-BFGS memories
199
    trainer_cls=LinePerIterationTrainer,
200
    verbose=True
201
)
202

203
# Stochastic gradient descent with calibration
204
crf_sgd = CRF(
205
    algorithm='l2sgd',
206
    calibration_eta=0.01,  # Lower initial learning rate
207
    calibration_rate=1.5,  # Slower learning rate adjustment
208
    calibration_samples=2000,  # More calibration samples
209
    trainer_cls=LinePerIterationTrainer,
210
    verbose=True
211
)
212

213
# Passive Aggressive with detailed monitoring
214
crf_pa = CRF(
215
    algorithm='pa',
216
    pa_type=2,  # PA-II variant
217
    c=0.1,  # Lower aggressiveness
218
    error_sensitive=True,  # Include error count in objective
219
    trainer_cls=LinePerIterationTrainer,
220
    verbose=True
221
)
222
```

Version

Tile

Files

advanced.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

advanced.mddocs/