0
# Utilities
1
2
Helper functions for timestamp formatting, model information, logging, and other utility operations that support the main speech recognition functionality.
3
4
## Capabilities
5
6
### Timestamp Formatting
7
8
Format floating-point seconds into human-readable timestamp strings with customizable format options.
9
10
```python { .api }
11
def format_timestamp(
12
seconds: float,
13
always_include_hours: bool = False,
14
decimal_marker: str = "."
15
) -> str:
16
"""
17
Format seconds as timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
18
19
Args:
20
seconds: Time in seconds (must be non-negative)
21
always_include_hours: If True, always include hours even if 0
22
decimal_marker: Character to use for decimal point (default: ".")
23
24
Returns:
25
Formatted timestamp string
26
27
Examples:
28
format_timestamp(65.123) -> "01:05.123"
29
format_timestamp(65.123, always_include_hours=True) -> "00:01:05.123"
30
format_timestamp(3661.5, decimal_marker=",") -> "01:01:01,500"
31
32
Raises:
33
AssertionError: If seconds is negative
34
"""
35
```
36
37
### Logging
38
39
Access the module logger for debugging and monitoring transcription operations.
40
41
```python { .api }
42
def get_logger():
43
"""
44
Get the faster_whisper module logger.
45
46
Returns:
47
logging.Logger: Logger instance for faster_whisper module
48
49
Usage:
50
logger = get_logger()
51
logger.info("Starting transcription")
52
logger.warning("Low audio quality detected")
53
"""
54
```
55
56
### Asset Path Management
57
58
Get the path to package assets directory containing model metadata and other resources.
59
60
```python { .api }
61
def get_assets_path() -> str:
62
"""
63
Get path to the faster_whisper assets directory.
64
65
Returns:
66
Absolute path to assets directory containing package resources
67
68
Notes:
69
- Contains model metadata and configuration files
70
- Used internally by the library for resource access
71
"""
72
```
73
74
### Internal Utilities
75
76
Additional utility functions used internally by the library.
77
78
```python { .api }
79
def get_end(segments: list[dict]) -> float | None:
80
"""
81
Get the end timestamp of the last segment with word-level timestamps.
82
83
Args:
84
segments: List of segment dictionaries with word timestamps
85
86
Returns:
87
End timestamp of last word, or last segment end time, or None if empty
88
89
Notes:
90
- Used internally for timestamp processing
91
- Prefers word-level timestamps over segment timestamps
92
"""
93
94
class disabled_tqdm(tqdm):
95
"""
96
Disabled tqdm progress bar for silent model downloads.
97
98
Used internally to suppress progress bars during model downloads
99
when progress display is not desired.
100
"""
101
def __init__(self, *args, **kwargs):
102
kwargs["disable"] = True
103
super().__init__(*args, **kwargs)
104
```
105
106
## Usage Examples
107
108
### Timestamp Formatting
109
110
```python
111
from faster_whisper import format_timestamp
112
113
# Basic formatting
114
print(format_timestamp(65.123)) # "01:05.123"
115
print(format_timestamp(3661.5)) # "01:01:01.500"
116
print(format_timestamp(12.7)) # "00:12.700"
117
118
# Always include hours
119
print(format_timestamp(65.123, always_include_hours=True)) # "00:01:05.123"
120
121
# Custom decimal marker for European formats
122
print(format_timestamp(65.123, decimal_marker=",")) # "01:05,123"
123
```
124
125
### Processing Transcription Results with Timestamps
126
127
```python
128
from faster_whisper import WhisperModel, format_timestamp
129
130
model = WhisperModel("base")
131
segments, info = model.transcribe("meeting.mp3", word_timestamps=True)
132
133
print(f"Meeting transcript - Duration: {format_timestamp(info.duration)}")
134
print("=" * 50)
135
136
for i, segment in enumerate(segments, 1):
137
start_ts = format_timestamp(segment.start)
138
end_ts = format_timestamp(segment.end)
139
140
print(f"Segment {i}: [{start_ts} -> {end_ts}]")
141
print(f" Text: {segment.text}")
142
print(f" Confidence: {segment.avg_logprob:.2f}")
143
144
if segment.words:
145
print(" Word timings:")
146
for word in segment.words:
147
word_start = format_timestamp(word.start)
148
word_end = format_timestamp(word.end)
149
print(f" {word.word} [{word_start}-{word_end}] (p={word.probability:.2f})")
150
print()
151
```
152
153
### Logging Configuration
154
155
```python
156
from faster_whisper import WhisperModel, get_logger
157
import logging
158
159
# Configure logging
160
logger = get_logger()
161
logger.setLevel(logging.INFO)
162
163
# Add custom handler
164
handler = logging.StreamHandler()
165
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
166
handler.setFormatter(formatter)
167
logger.addHandler(handler)
168
169
# Now faster_whisper operations will log information
170
model = WhisperModel("base")
171
segments, info = model.transcribe("audio.mp3") # Will show loading/processing logs
172
```
173
174
### Subtitle Generation
175
176
```python
177
from faster_whisper import WhisperModel, format_timestamp
178
179
def generate_srt_subtitles(audio_path, output_path, max_chars_per_line=50):
180
"""Generate SRT subtitle file from audio."""
181
model = WhisperModel("base")
182
segments, info = model.transcribe(audio_path, word_timestamps=True)
183
184
with open(output_path, 'w', encoding='utf-8') as f:
185
for i, segment in enumerate(segments, 1):
186
# Format timestamps for SRT (HH:MM:SS,mmm)
187
start_time = format_timestamp(segment.start,
188
always_include_hours=True,
189
decimal_marker=",")
190
end_time = format_timestamp(segment.end,
191
always_include_hours=True,
192
decimal_marker=",")
193
194
# Break long text into multiple lines
195
text = segment.text.strip()
196
if len(text) > max_chars_per_line:
197
# Simple word-based line breaking
198
words = text.split()
199
lines = []
200
current_line = ""
201
202
for word in words:
203
if len(current_line + " " + word) <= max_chars_per_line:
204
current_line += (" " + word if current_line else word)
205
else:
206
if current_line:
207
lines.append(current_line)
208
current_line = word
209
210
if current_line:
211
lines.append(current_line)
212
213
text = "\n".join(lines)
214
215
# Write SRT entry
216
f.write(f"{i}\n")
217
f.write(f"{start_time} --> {end_time}\n")
218
f.write(f"{text}\n\n")
219
220
print(f"Generated subtitles: {output_path}")
221
222
# Generate subtitles
223
generate_srt_subtitles("video.mp4", "subtitles.srt")
224
```
225
226
### Batch Processing with Progress Tracking
227
228
```python
229
from faster_whisper import WhisperModel, format_timestamp, get_logger
230
import os
231
import time
232
233
def process_audio_directory(directory_path, output_dir):
234
"""Process all audio files in directory with progress tracking."""
235
logger = get_logger()
236
model = WhisperModel("base")
237
238
# Find audio files
239
audio_extensions = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
240
audio_files = []
241
242
for filename in os.listdir(directory_path):
243
if any(filename.lower().endswith(ext) for ext in audio_extensions):
244
audio_files.append(filename)
245
246
print(f"Found {len(audio_files)} audio files to process")
247
248
# Process each file
249
results = []
250
start_time = time.time()
251
252
for i, filename in enumerate(audio_files, 1):
253
file_path = os.path.join(directory_path, filename)
254
print(f"\n[{i}/{len(audio_files)}] Processing: {filename}")
255
256
try:
257
# Transcribe
258
file_start = time.time()
259
segments, info = model.transcribe(file_path)
260
processing_time = time.time() - file_start
261
262
# Collect results
263
transcript_text = " ".join(segment.text for segment in segments)
264
duration_str = format_timestamp(info.duration)
265
266
result = {
267
'filename': filename,
268
'duration': info.duration,
269
'language': info.language,
270
'confidence': info.language_probability,
271
'processing_time': processing_time,
272
'transcript': transcript_text
273
}
274
results.append(result)
275
276
print(f" Duration: {duration_str}")
277
print(f" Language: {info.language} (confidence: {info.language_probability:.2f})")
278
print(f" Processing time: {processing_time:.2f}s")
279
print(f" Speed: {info.duration/processing_time:.1f}x realtime")
280
281
# Save individual transcript
282
output_file = os.path.join(output_dir, f"{filename}.txt")
283
with open(output_file, 'w', encoding='utf-8') as f:
284
f.write(f"File: {filename}\n")
285
f.write(f"Duration: {duration_str}\n")
286
f.write(f"Language: {info.language}\n\n")
287
f.write(transcript_text)
288
289
except Exception as e:
290
logger.error(f"Error processing {filename}: {e}")
291
print(f" ERROR: {e}")
292
293
# Summary
294
total_time = time.time() - start_time
295
total_audio_duration = sum(r['duration'] for r in results)
296
297
print(f"\n" + "="*50)
298
print(f"Processing complete!")
299
print(f"Files processed: {len(results)}/{len(audio_files)}")
300
print(f"Total audio duration: {format_timestamp(total_audio_duration)}")
301
print(f"Total processing time: {format_timestamp(total_time)}")
302
print(f"Overall speed: {total_audio_duration/total_time:.1f}x realtime")
303
304
# Process directory
305
os.makedirs("transcripts", exist_ok=True)
306
process_audio_directory("audio_files", "transcripts")
307
```
308
309
## Version Information
310
311
The package version is available for programmatic access:
312
313
```python
314
from faster_whisper import __version__
315
print(f"faster-whisper version: {__version__}") # "1.2.0"
316
```
317
318
This can be useful for:
319
- Compatibility checking in applications
320
- Logging and debugging information
321
- Feature detection based on version
322
- Integration with package management systems