or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-evaluation.mdevaluation-suites.mdhub-integration.mdindex.mdmodule-discovery.mdtask-evaluators.mdutilities.md

module-discovery.mddocs/

0

# Module Discovery

1

2

Tools for discovering, listing, and inspecting available evaluation modules from the Hugging Face Hub and local sources. These functions help users explore the ecosystem of available metrics, comparisons, and measurements.

3

4

## Capabilities

5

6

### List Available Modules

7

8

Discover all available evaluation modules on the Hugging Face Hub:

9

10

```python { .api }

11

def list_evaluation_modules(

12

module_type: Optional[str] = None,

13

include_community: bool = True,

14

with_details: bool = False

15

) -> List[Union[str, Dict[str, Any]]]:

16

"""List all evaluation modules available on the Hugging Face Hub.

17

18

Args:

19

module_type: Type filter ('metric', 'comparison', 'measurement', or None for all)

20

include_community: Whether to include community-contributed modules

21

with_details: Return full metadata dict instead of just module ID strings

22

23

Returns:

24

List of module IDs (strings) when with_details=False, or

25

list of metadata dictionaries when with_details=True

26

"""

27

```

28

29

**Parameters:**

30

- `module_type`: Filter by type ("metric", "comparison", "measurement") or None for all

31

- `include_community`: Whether to include community-contributed modules

32

- `with_details`: Whether to include detailed metadata for each module

33

34

**Usage Example:**

35

```python

36

import evaluate

37

38

# List all available evaluation modules

39

all_modules = evaluate.list_evaluation_modules()

40

print(f"Found {len(all_modules)} evaluation modules")

41

42

# List only metrics

43

metrics = evaluate.list_evaluation_modules(module_type="metric")

44

print(f"Available metrics: {len(metrics)}")

45

46

# List with detailed information

47

detailed_metrics = evaluate.list_evaluation_modules(

48

module_type="metric",

49

with_details=True

50

)

51

52

for metric in detailed_metrics[:5]: # Show first 5

53

print(f"- {metric['id']}: {metric.get('description', 'No description')}")

54

```

55

56

**Example Output:**

57

```python

58

# Basic listing

59

[

60

{'id': 'accuracy'},

61

{'id': 'bleu'},

62

{'id': 'rouge'},

63

{'id': 'f1'},

64

# ... more modules

65

]

66

67

# Detailed listing

68

[

69

{

70

'id': 'accuracy',

71

'description': 'Computes the accuracy classification score.',

72

'tags': ['evaluation', 'metric'],

73

'downloads': 50000,

74

# ... additional metadata

75

},

76

# ... more detailed entries

77

]

78

```

79

80

**Filter by Module Type:**

81

```python

82

import evaluate

83

84

# Get only comparison modules

85

comparisons = evaluate.list_evaluation_modules(module_type="comparison")

86

print("Available comparisons:", [comp['id'] for comp in comparisons])

87

88

# Get only measurement modules

89

measurements = evaluate.list_evaluation_modules(module_type="measurement")

90

print("Available measurements:", [meas['id'] for meas in measurements])

91

92

# Include only official modules (exclude community)

93

official_metrics = evaluate.list_evaluation_modules(

94

module_type="metric",

95

include_community=False

96

)

97

```

98

99

### Inspect Evaluation Modules

100

101

Copy evaluation modules to local directories for inspection and modification:

102

103

```python { .api }

104

def inspect_evaluation_module(

105

path: str,

106

local_path: str,

107

download_config: Optional[DownloadConfig] = None,

108

**download_kwargs

109

) -> None:

110

"""Copy an evaluation module locally for inspection and modification.

111

112

Args:

113

path: Path to evaluation module - can be Hub module name

114

(e.g., 'accuracy') or local path to module

115

local_path: Local directory path where module will be copied

116

download_config: Configuration for downloading from Hub (optional)

117

**download_kwargs: Additional download parameters

118

"""

119

```

120

121

**Parameters:**

122

- `path`: Hub module name or path to local module

123

- `local_path`: Local directory where module will be copied

124

- `download_config`: Configuration for downloading from Hub

125

- `**download_kwargs`: Additional download parameters

126

127

**Usage Example:**

128

```python

129

import evaluate

130

import os

131

132

# Inspect a metric from the Hub

133

evaluate.inspect_evaluation_module(

134

path="accuracy",

135

local_path="./inspected_accuracy"

136

)

137

138

# Check what was downloaded

139

print("Inspected files:")

140

for root, dirs, files in os.walk("./inspected_accuracy"):

141

for file in files:

142

print(f"- {os.path.join(root, file)}")

143

144

# Now you can examine and modify the module

145

with open("./inspected_accuracy/accuracy.py", "r") as f:

146

print("Module source:")

147

print(f.read()[:500] + "...")

148

```

149

150

**Inspect Community Module:**

151

```python

152

import evaluate

153

154

# Inspect a community-contributed module

155

evaluate.inspect_evaluation_module(

156

path="username/custom-metric",

157

local_path="./custom_metric_inspection"

158

)

159

160

# Inspect with specific configuration

161

from datasets import DownloadConfig

162

163

config = DownloadConfig(

164

cache_dir="./custom_cache",

165

force_download=True

166

)

167

168

evaluate.inspect_evaluation_module(

169

path="bleu",

170

local_path="./bleu_source",

171

download_config=config

172

)

173

```

174

175

**Modify and Use Inspected Module:**

176

```python

177

import evaluate

178

179

# First inspect the module

180

evaluate.inspect_evaluation_module(

181

path="f1",

182

local_path="./my_f1_variant"

183

)

184

185

# Modify the local copy (edit files as needed)

186

# ... make changes to ./my_f1_variant/f1.py ...

187

188

# Load your modified version

189

custom_f1 = evaluate.load("./my_f1_variant")

190

191

# Use the modified metric

192

result = custom_f1.compute(

193

predictions=[1, 0, 1, 0],

194

references=[1, 1, 0, 0]

195

)

196

print(result)

197

```

198

199

## Discovery Workflows

200

201

**Explore Available Metrics for a Task:**

202

```python

203

import evaluate

204

205

# Get all metrics and filter by name patterns

206

all_modules = evaluate.list_evaluation_modules(with_details=True)

207

208

# Find text-related metrics

209

text_metrics = [

210

module for module in all_modules

211

if any(keyword in module['id'].lower()

212

for keyword in ['bleu', 'rouge', 'bertscore', 'meteor'])

213

]

214

215

print("Text generation metrics:")

216

for metric in text_metrics:

217

print(f"- {metric['id']}: {metric.get('description', '')}")

218

219

# Find classification metrics

220

classification_metrics = [

221

module for module in all_modules

222

if any(keyword in module['id'].lower()

223

for keyword in ['accuracy', 'f1', 'precision', 'recall'])

224

]

225

226

print("\nClassification metrics:")

227

for metric in classification_metrics:

228

print(f"- {metric['id']}: {metric.get('description', '')}")

229

```

230

231

**Research and Development Workflow:**

232

```python

233

import evaluate

234

235

# 1. Discover what's available

236

metrics = evaluate.list_evaluation_modules(

237

module_type="metric",

238

with_details=True

239

)

240

241

# 2. Find metrics of interest

242

nlp_metrics = [m for m in metrics if 'nlp' in str(m.get('tags', [])).lower()]

243

244

# 3. Inspect implementation details

245

for metric in nlp_metrics[:3]: # Inspect first 3

246

print(f"Inspecting {metric['id']}...")

247

evaluate.inspect_evaluation_module(

248

path=metric['id'],

249

local_path=f"./inspected_{metric['id']}"

250

)

251

252

# 4. Load and test metrics

253

for metric in nlp_metrics[:3]:

254

try:

255

loaded_metric = evaluate.load(metric['id'])

256

print(f"✓ Successfully loaded {metric['id']}")

257

print(f" Description: {loaded_metric.description}")

258

except Exception as e:

259

print(f"✗ Failed to load {metric['id']}: {e}")

260

```

261

262

**Compare Module Versions:**

263

```python

264

import evaluate

265

266

# Inspect different versions of a module

267

versions = ["v1.0.0", "v1.1.0", "main"]

268

269

for version in versions:

270

evaluate.inspect_evaluation_module(

271

path="bleu",

272

local_path=f"./bleu_{version}",

273

revision=version

274

)

275

print(f"Inspected BLEU {version}")

276

277

# Now compare the implementations manually

278

# ... examine differences between versions ...

279

```

280

281

## Error Handling

282

283

Module discovery functions may raise:

284

285

- `ConnectionError`: Network issues accessing the Hub

286

- `FileNotFoundError`: Module not found on Hub or locally

287

- `PermissionError`: Cannot write to local inspection directory

288

- `ValueError`: Invalid module type or configuration

289

290

**Example:**

291

```python

292

import evaluate

293

294

try:

295

modules = evaluate.list_evaluation_modules()

296

except ConnectionError:

297

print("Cannot connect to Hub - working offline")

298

modules = []

299

300

try:

301

evaluate.inspect_evaluation_module(

302

path="nonexistent-metric",

303

local_path="./test"

304

)

305

except FileNotFoundError:

306

print("Metric not found on Hub")

307

308

try:

309

evaluate.inspect_evaluation_module(

310

path="accuracy",

311

local_path="/root/protected" # May not have write access

312

)

313

except PermissionError:

314

print("Cannot write to protected directory")

315

```