or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio-processing.mdbatched-processing.mdcore-speech-recognition.mdindex.mdutilities.mdvoice-activity-detection.md

utilities.mddocs/

0

# Utilities

1

2

Helper functions for timestamp formatting, model information, logging, and other utility operations that support the main speech recognition functionality.

3

4

## Capabilities

5

6

### Timestamp Formatting

7

8

Format floating-point seconds into human-readable timestamp strings with customizable format options.

9

10

```python { .api }

11

def format_timestamp(

12

seconds: float,

13

always_include_hours: bool = False,

14

decimal_marker: str = "."

15

) -> str:

16

"""

17

Format seconds as timestamp string (HH:MM:SS.mmm or MM:SS.mmm).

18

19

Args:

20

seconds: Time in seconds (must be non-negative)

21

always_include_hours: If True, always include hours even if 0

22

decimal_marker: Character to use for decimal point (default: ".")

23

24

Returns:

25

Formatted timestamp string

26

27

Examples:

28

format_timestamp(65.123) -> "01:05.123"

29

format_timestamp(65.123, always_include_hours=True) -> "00:01:05.123"

30

format_timestamp(3661.5, decimal_marker=",") -> "01:01:01,500"

31

32

Raises:

33

AssertionError: If seconds is negative

34

"""

35

```

36

37

### Logging

38

39

Access the module logger for debugging and monitoring transcription operations.

40

41

```python { .api }

42

def get_logger():

43

"""

44

Get the faster_whisper module logger.

45

46

Returns:

47

logging.Logger: Logger instance for faster_whisper module

48

49

Usage:

50

logger = get_logger()

51

logger.info("Starting transcription")

52

logger.warning("Low audio quality detected")

53

"""

54

```

55

56

### Asset Path Management

57

58

Get the path to package assets directory containing model metadata and other resources.

59

60

```python { .api }

61

def get_assets_path() -> str:

62

"""

63

Get path to the faster_whisper assets directory.

64

65

Returns:

66

Absolute path to assets directory containing package resources

67

68

Notes:

69

- Contains model metadata and configuration files

70

- Used internally by the library for resource access

71

"""

72

```

73

74

### Internal Utilities

75

76

Additional utility functions used internally by the library.

77

78

```python { .api }

79

def get_end(segments: list[dict]) -> float | None:

80

"""

81

Get the end timestamp of the last segment with word-level timestamps.

82

83

Args:

84

segments: List of segment dictionaries with word timestamps

85

86

Returns:

87

End timestamp of last word, or last segment end time, or None if empty

88

89

Notes:

90

- Used internally for timestamp processing

91

- Prefers word-level timestamps over segment timestamps

92

"""

93

94

class disabled_tqdm(tqdm):

95

"""

96

Disabled tqdm progress bar for silent model downloads.

97

98

Used internally to suppress progress bars during model downloads

99

when progress display is not desired.

100

"""

101

def __init__(self, *args, **kwargs):

102

kwargs["disable"] = True

103

super().__init__(*args, **kwargs)

104

```

105

106

## Usage Examples

107

108

### Timestamp Formatting

109

110

```python

111

from faster_whisper import format_timestamp

112

113

# Basic formatting

114

print(format_timestamp(65.123)) # "01:05.123"

115

print(format_timestamp(3661.5)) # "01:01:01.500"

116

print(format_timestamp(12.7)) # "00:12.700"

117

118

# Always include hours

119

print(format_timestamp(65.123, always_include_hours=True)) # "00:01:05.123"

120

121

# Custom decimal marker for European formats

122

print(format_timestamp(65.123, decimal_marker=",")) # "01:05,123"

123

```

124

125

### Processing Transcription Results with Timestamps

126

127

```python

128

from faster_whisper import WhisperModel, format_timestamp

129

130

model = WhisperModel("base")

131

segments, info = model.transcribe("meeting.mp3", word_timestamps=True)

132

133

print(f"Meeting transcript - Duration: {format_timestamp(info.duration)}")

134

print("=" * 50)

135

136

for i, segment in enumerate(segments, 1):

137

start_ts = format_timestamp(segment.start)

138

end_ts = format_timestamp(segment.end)

139

140

print(f"Segment {i}: [{start_ts} -> {end_ts}]")

141

print(f" Text: {segment.text}")

142

print(f" Confidence: {segment.avg_logprob:.2f}")

143

144

if segment.words:

145

print(" Word timings:")

146

for word in segment.words:

147

word_start = format_timestamp(word.start)

148

word_end = format_timestamp(word.end)

149

print(f" {word.word} [{word_start}-{word_end}] (p={word.probability:.2f})")

150

print()

151

```

152

153

### Logging Configuration

154

155

```python

156

from faster_whisper import WhisperModel, get_logger

157

import logging

158

159

# Configure logging

160

logger = get_logger()

161

logger.setLevel(logging.INFO)

162

163

# Add custom handler

164

handler = logging.StreamHandler()

165

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

166

handler.setFormatter(formatter)

167

logger.addHandler(handler)

168

169

# Now faster_whisper operations will log information

170

model = WhisperModel("base")

171

segments, info = model.transcribe("audio.mp3") # Will show loading/processing logs

172

```

173

174

### Subtitle Generation

175

176

```python

177

from faster_whisper import WhisperModel, format_timestamp

178

179

def generate_srt_subtitles(audio_path, output_path, max_chars_per_line=50):

180

"""Generate SRT subtitle file from audio."""

181

model = WhisperModel("base")

182

segments, info = model.transcribe(audio_path, word_timestamps=True)

183

184

with open(output_path, 'w', encoding='utf-8') as f:

185

for i, segment in enumerate(segments, 1):

186

# Format timestamps for SRT (HH:MM:SS,mmm)

187

start_time = format_timestamp(segment.start,

188

always_include_hours=True,

189

decimal_marker=",")

190

end_time = format_timestamp(segment.end,

191

always_include_hours=True,

192

decimal_marker=",")

193

194

# Break long text into multiple lines

195

text = segment.text.strip()

196

if len(text) > max_chars_per_line:

197

# Simple word-based line breaking

198

words = text.split()

199

lines = []

200

current_line = ""

201

202

for word in words:

203

if len(current_line + " " + word) <= max_chars_per_line:

204

current_line += (" " + word if current_line else word)

205

else:

206

if current_line:

207

lines.append(current_line)

208

current_line = word

209

210

if current_line:

211

lines.append(current_line)

212

213

text = "\n".join(lines)

214

215

# Write SRT entry

216

f.write(f"{i}\n")

217

f.write(f"{start_time} --> {end_time}\n")

218

f.write(f"{text}\n\n")

219

220

print(f"Generated subtitles: {output_path}")

221

222

# Generate subtitles

223

generate_srt_subtitles("video.mp4", "subtitles.srt")

224

```

225

226

### Batch Processing with Progress Tracking

227

228

```python

229

from faster_whisper import WhisperModel, format_timestamp, get_logger

230

import os

231

import time

232

233

def process_audio_directory(directory_path, output_dir):

234

"""Process all audio files in directory with progress tracking."""

235

logger = get_logger()

236

model = WhisperModel("base")

237

238

# Find audio files

239

audio_extensions = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}

240

audio_files = []

241

242

for filename in os.listdir(directory_path):

243

if any(filename.lower().endswith(ext) for ext in audio_extensions):

244

audio_files.append(filename)

245

246

print(f"Found {len(audio_files)} audio files to process")

247

248

# Process each file

249

results = []

250

start_time = time.time()

251

252

for i, filename in enumerate(audio_files, 1):

253

file_path = os.path.join(directory_path, filename)

254

print(f"\n[{i}/{len(audio_files)}] Processing: {filename}")

255

256

try:

257

# Transcribe

258

file_start = time.time()

259

segments, info = model.transcribe(file_path)

260

processing_time = time.time() - file_start

261

262

# Collect results

263

transcript_text = " ".join(segment.text for segment in segments)

264

duration_str = format_timestamp(info.duration)

265

266

result = {

267

'filename': filename,

268

'duration': info.duration,

269

'language': info.language,

270

'confidence': info.language_probability,

271

'processing_time': processing_time,

272

'transcript': transcript_text

273

}

274

results.append(result)

275

276

print(f" Duration: {duration_str}")

277

print(f" Language: {info.language} (confidence: {info.language_probability:.2f})")

278

print(f" Processing time: {processing_time:.2f}s")

279

print(f" Speed: {info.duration/processing_time:.1f}x realtime")

280

281

# Save individual transcript

282

output_file = os.path.join(output_dir, f"{filename}.txt")

283

with open(output_file, 'w', encoding='utf-8') as f:

284

f.write(f"File: {filename}\n")

285

f.write(f"Duration: {duration_str}\n")

286

f.write(f"Language: {info.language}\n\n")

287

f.write(transcript_text)

288

289

except Exception as e:

290

logger.error(f"Error processing {filename}: {e}")

291

print(f" ERROR: {e}")

292

293

# Summary

294

total_time = time.time() - start_time

295

total_audio_duration = sum(r['duration'] for r in results)

296

297

print(f"\n" + "="*50)

298

print(f"Processing complete!")

299

print(f"Files processed: {len(results)}/{len(audio_files)}")

300

print(f"Total audio duration: {format_timestamp(total_audio_duration)}")

301

print(f"Total processing time: {format_timestamp(total_time)}")

302

print(f"Overall speed: {total_audio_duration/total_time:.1f}x realtime")

303

304

# Process directory

305

os.makedirs("transcripts", exist_ok=True)

306

process_audio_directory("audio_files", "transcripts")

307

```

308

309

## Version Information

310

311

The package version is available for programmatic access:

312

313

```python

314

from faster_whisper import __version__

315

print(f"faster-whisper version: {__version__}") # "1.2.0"

316

```

317

318

This can be useful for:

319

- Compatibility checking in applications

320

- Logging and debugging information

321

- Feature detection based on version

322

- Integration with package management systems