or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced.mdcrf-estimator.mdindex.mdmetrics.mdsklearn-integration.mdutils.md

advanced.mddocs/

0

# Advanced Features

1

2

Advanced customization options for sklearn-crfsuite including custom trainer classes, specialized training workflows, and detailed training logging. These features are useful for users who need fine-grained control over the training process or want detailed insights into model convergence.

3

4

## Capabilities

5

6

### Custom Training with Detailed Logging

7

8

The LinePerIterationTrainer provides enhanced training output with detailed per-iteration statistics, making it easier to monitor training progress and diagnose convergence issues.

9

10

```python { .api }

11

class LinePerIterationTrainer:

12

"""

13

Enhanced pycrfsuite.Trainer that prints detailed information about each

14

training iteration on a single line with tabulated final results.

15

16

This trainer extends the standard pycrfsuite.Trainer with:

17

- Per-iteration progress logging

18

- Detailed performance metrics during training

19

- Final performance summary table

20

"""

21

22

def on_iteration(self, log, info):

23

"""Called after each training iteration to display progress."""

24

25

def on_optimization_end(self, log):

26

"""Called when training completes to display final results table."""

27

```

28

29

**Usage Example:**

30

31

```python

32

from sklearn_crfsuite import CRF

33

from sklearn_crfsuite.trainer import LinePerIterationTrainer

34

35

# Use custom trainer for detailed training logs

36

crf = CRF(

37

algorithm='lbfgs',

38

c1=0.1,

39

c2=0.1,

40

max_iterations=100,

41

verbose=True, # Enable trainer output

42

trainer_cls=LinePerIterationTrainer # Use enhanced trainer

43

)

44

45

# Training will now show detailed per-iteration progress

46

crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)

47

48

# Example output during training:

49

# Iter 1 time=0.05 loss=45.23 active=1250

50

# Iter 2 time=0.04 loss=38.17 active=1180

51

# Iter 3 time=0.04 loss=32.91 active=1150 precision=0.752 recall=0.698 F1=0.724 Acc(item/seq)=0.834 0.567

52

# ...

53

# =====================================

54

# Label Precision Recall F1 Support

55

# B-PER 0.856 0.792 0.823 125

56

# I-PER 0.743 0.681 0.711 89

57

# B-LOC 0.912 0.878 0.895 156

58

# I-LOC 0.834 0.798 0.816 67

59

# O 0.945 0.967 0.956 1543

60

# -------------------------------------

61

```

62

63

### Training Progress Analysis

64

65

Access detailed training logs for analysis and debugging:

66

67

```python { .api }

68

@property

69

def training_log_:

70

"""

71

Training log parser containing detailed iteration information.

72

Available after model training completes.

73

"""

74

```

75

76

**Usage Example:**

77

78

```python

79

# Access training log after fitting

80

crf.fit(X_train, y_train)

81

training_log = crf.training_log_

82

83

# Analyze training progress

84

if training_log:

85

last_iteration = training_log.last_iteration

86

print(f"Final loss: {last_iteration.get('loss', 'N/A')}")

87

print(f"Training time: {last_iteration.get('time', 'N/A')} seconds")

88

print(f"Active features: {last_iteration.get('active_features', 'N/A')}")

89

90

# Check if validation scores are available

91

if 'scores' in last_iteration:

92

print("\nFinal validation scores per label:")

93

for label, score in last_iteration['scores'].items():

94

print(f"{label}: P={score.precision:.3f} R={score.recall:.3f} F1={score.f1:.3f}")

95

```

96

97

### Custom Trainer Implementation

98

99

Create custom trainers for specialized training workflows:

100

101

**Usage Example:**

102

103

```python

104

import pycrfsuite

105

from sklearn_crfsuite.trainer import LinePerIterationTrainer

106

107

class CustomTrainer(LinePerIterationTrainer):

108

"""Custom trainer with additional logging and early stopping."""

109

110

def __init__(self, *args, **kwargs):

111

super().__init__(*args, **kwargs)

112

self.iteration_losses = []

113

self.early_stop_patience = 10

114

self.early_stop_threshold = 0.001

115

116

def on_iteration(self, log, info):

117

# Store loss for early stopping analysis

118

current_loss = info.get('loss', float('inf'))

119

self.iteration_losses.append(current_loss)

120

121

# Check for early stopping

122

if len(self.iteration_losses) > self.early_stop_patience:

123

recent_losses = self.iteration_losses[-self.early_stop_patience:]

124

if max(recent_losses) - min(recent_losses) < self.early_stop_threshold:

125

print(f"\\nEarly stopping at iteration {info['num']} - loss converged")

126

127

# Call parent method for standard logging

128

super().on_iteration(log, info)

129

130

def on_optimization_end(self, log):

131

print(f"\\nTraining completed with {len(self.iteration_losses)} iterations")

132

print(f"Final loss: {self.iteration_losses[-1]:.4f}")

133

super().on_optimization_end(log)

134

135

# Use custom trainer

136

crf = CRF(

137

algorithm='lbfgs',

138

c1=0.1,

139

c2=0.1,

140

trainer_cls=CustomTrainer,

141

verbose=True

142

)

143

```

144

145

### Training Diagnostics

146

147

Advanced diagnostic functions for training analysis:

148

149

**Usage Example:**

150

151

```python

152

def analyze_training_convergence(crf):

153

"""Analyze training convergence patterns."""

154

if not crf.training_log_:

155

print("No training log available")

156

return

157

158

log = crf.training_log_

159

iterations = []

160

losses = []

161

162

# Extract iteration data (this is conceptual - actual log structure may vary)

163

for i in range(len(log.iterations)):

164

iter_info = log.iterations[i]

165

iterations.append(iter_info.get('num', i))

166

losses.append(iter_info.get('loss', 0))

167

168

# Analyze convergence

169

if len(losses) > 10:

170

early_loss = sum(losses[:5]) / 5

171

late_loss = sum(losses[-5:]) / 5

172

improvement = (early_loss - late_loss) / early_loss * 100

173

print(f"Loss improvement: {improvement:.2f}%")

174

175

# Check for overfitting indicators

176

if len(losses) > 20:

177

mid_loss = sum(losses[10:15]) / 5

178

if late_loss > mid_loss:

179

print("Warning: Possible overfitting detected")

180

181

# Usage

182

crf.fit(X_train, y_train, X_dev=X_val, y_dev=y_val)

183

analyze_training_convergence(crf)

184

```

185

186

### Algorithm-Specific Configuration

187

188

Advanced configuration for different training algorithms:

189

190

**Usage Example:**

191

192

```python

193

# L-BFGS with custom line search

194

crf_lbfgs = CRF(

195

algorithm='lbfgs',

196

linesearch='StrongBacktracking', # More aggressive line search

197

max_linesearch=50, # More line search attempts

198

num_memories=10, # More L-BFGS memories

199

trainer_cls=LinePerIterationTrainer,

200

verbose=True

201

)

202

203

# Stochastic gradient descent with calibration

204

crf_sgd = CRF(

205

algorithm='l2sgd',

206

calibration_eta=0.01, # Lower initial learning rate

207

calibration_rate=1.5, # Slower learning rate adjustment

208

calibration_samples=2000, # More calibration samples

209

trainer_cls=LinePerIterationTrainer,

210

verbose=True

211

)

212

213

# Passive Aggressive with detailed monitoring

214

crf_pa = CRF(

215

algorithm='pa',

216

pa_type=2, # PA-II variant

217

c=0.1, # Lower aggressiveness

218

error_sensitive=True, # Include error count in objective

219

trainer_cls=LinePerIterationTrainer,

220

verbose=True

221

)

222

```