CRFsuite (python-crfsuite) wrapper which provides interface similar to scikit-learn
—
Advanced customization options for sklearn-crfsuite including custom trainer classes, specialized training workflows, and detailed training logging. These features are useful for users who need fine-grained control over the training process or want detailed insights into model convergence.
The LinePerIterationTrainer provides enhanced training output with detailed per-iteration statistics, making it easier to monitor training progress and diagnose convergence issues.
class LinePerIterationTrainer:
"""
Enhanced pycrfsuite.Trainer that prints detailed information about each
training iteration on a single line with tabulated final results.
This trainer extends the standard pycrfsuite.Trainer with:
- Per-iteration progress logging
- Detailed performance metrics during training
- Final performance summary table
"""
def on_iteration(self, log, info):
"""Called after each training iteration to display progress."""
def on_optimization_end(self, log):
"""Called when training completes to display final results table."""Usage Example:
from sklearn_crfsuite import CRF
from sklearn_crfsuite.trainer import LinePerIterationTrainer
# Use custom trainer for detailed training logs
crf = CRF(
algorithm='lbfgs',
c1=0.1,
c2=0.1,
max_iterations=100,
verbose=True, # Enable trainer output
trainer_cls=LinePerIterationTrainer # Use enhanced trainer
)
# Training will now show detailed per-iteration progress
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
# Example output during training:
# Iter 1 time=0.05 loss=45.23 active=1250
# Iter 2 time=0.04 loss=38.17 active=1180
# Iter 3 time=0.04 loss=32.91 active=1150 precision=0.752 recall=0.698 F1=0.724 Acc(item/seq)=0.834 0.567
# ...
# =====================================
# Label Precision Recall F1 Support
# B-PER 0.856 0.792 0.823 125
# I-PER 0.743 0.681 0.711 89
# B-LOC 0.912 0.878 0.895 156
# I-LOC 0.834 0.798 0.816 67
# O 0.945 0.967 0.956 1543
# -------------------------------------Access detailed training logs for analysis and debugging:
@property
def training_log_:
"""
Training log parser containing detailed iteration information.
Available after model training completes.
"""Usage Example:
# Access training log after fitting
crf.fit(X_train, y_train)
training_log = crf.training_log_
# Analyze training progress
if training_log:
last_iteration = training_log.last_iteration
print(f"Final loss: {last_iteration.get('loss', 'N/A')}")
print(f"Training time: {last_iteration.get('time', 'N/A')} seconds")
print(f"Active features: {last_iteration.get('active_features', 'N/A')}")
# Check if validation scores are available
if 'scores' in last_iteration:
print("\nFinal validation scores per label:")
for label, score in last_iteration['scores'].items():
print(f"{label}: P={score.precision:.3f} R={score.recall:.3f} F1={score.f1:.3f}")Create custom trainers for specialized training workflows:
Usage Example:
import pycrfsuite
from sklearn_crfsuite.trainer import LinePerIterationTrainer
class CustomTrainer(LinePerIterationTrainer):
"""Custom trainer with additional logging and early stopping."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.iteration_losses = []
self.early_stop_patience = 10
self.early_stop_threshold = 0.001
def on_iteration(self, log, info):
# Store loss for early stopping analysis
current_loss = info.get('loss', float('inf'))
self.iteration_losses.append(current_loss)
# Check for early stopping
if len(self.iteration_losses) > self.early_stop_patience:
recent_losses = self.iteration_losses[-self.early_stop_patience:]
if max(recent_losses) - min(recent_losses) < self.early_stop_threshold:
print(f"\\nEarly stopping at iteration {info['num']} - loss converged")
# Call parent method for standard logging
super().on_iteration(log, info)
def on_optimization_end(self, log):
print(f"\\nTraining completed with {len(self.iteration_losses)} iterations")
print(f"Final loss: {self.iteration_losses[-1]:.4f}")
super().on_optimization_end(log)
# Use custom trainer
crf = CRF(
algorithm='lbfgs',
c1=0.1,
c2=0.1,
trainer_cls=CustomTrainer,
verbose=True
)Advanced diagnostic functions for training analysis:
Usage Example:
def analyze_training_convergence(crf):
"""Analyze training convergence patterns."""
if not crf.training_log_:
print("No training log available")
return
log = crf.training_log_
iterations = []
losses = []
# Extract iteration data (this is conceptual - actual log structure may vary)
for i in range(len(log.iterations)):
iter_info = log.iterations[i]
iterations.append(iter_info.get('num', i))
losses.append(iter_info.get('loss', 0))
# Analyze convergence
if len(losses) > 10:
early_loss = sum(losses[:5]) / 5
late_loss = sum(losses[-5:]) / 5
improvement = (early_loss - late_loss) / early_loss * 100
print(f"Loss improvement: {improvement:.2f}%")
# Check for overfitting indicators
if len(losses) > 20:
mid_loss = sum(losses[10:15]) / 5
if late_loss > mid_loss:
print("Warning: Possible overfitting detected")
# Usage
crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)
analyze_training_convergence(crf)Advanced configuration for different training algorithms:
Usage Example:
# L-BFGS with custom line search
crf_lbfgs = CRF(
algorithm='lbfgs',
linesearch='StrongBacktracking', # More aggressive line search
max_linesearch=50, # More line search attempts
num_memories=10, # More L-BFGS memories
trainer_cls=LinePerIterationTrainer,
verbose=True
)
# Stochastic gradient descent with calibration
crf_sgd = CRF(
algorithm='l2sgd',
calibration_eta=0.01, # Lower initial learning rate
calibration_rate=1.5, # Slower learning rate adjustment
calibration_samples=2000, # More calibration samples
trainer_cls=LinePerIterationTrainer,
verbose=True
)
# Passive Aggressive with detailed monitoring
crf_pa = CRF(
algorithm='pa',
pa_type=2, # PA-II variant
c=0.1, # Lower aggressiveness
error_sensitive=True, # Include error count in objective
trainer_cls=LinePerIterationTrainer,
verbose=True
)Install with Tessl CLI
npx tessl i tessl/pypi-sklearn-crfsuite