Tessl Tile for pypi/faster-whisper@1.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio-processing.md batched-processing.md core-speech-recognition.md index.md utilities.md voice-activity-detection.md

utilities.mddocs/

0
# Utilities
1

2
Helper functions for timestamp formatting, model information, logging, and other utility operations that support the main speech recognition functionality.
3

4
## Capabilities
5

6
### Timestamp Formatting
7

8
Format floating-point seconds into human-readable timestamp strings with customizable format options.
9

10
```python { .api }
11
def format_timestamp(
12
    seconds: float,
13
    always_include_hours: bool = False,
14
    decimal_marker: str = "."
15
) -> str:
16
    """
17
    Format seconds as timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
18
    
19
    Args:
20
        seconds: Time in seconds (must be non-negative)
21
        always_include_hours: If True, always include hours even if 0
22
        decimal_marker: Character to use for decimal point (default: ".")
23
        
24
    Returns:
25
        Formatted timestamp string
26
        
27
    Examples:
28
        format_timestamp(65.123) -> "01:05.123"
29
        format_timestamp(65.123, always_include_hours=True) -> "00:01:05.123"
30
        format_timestamp(3661.5, decimal_marker=",") -> "01:01:01,500"
31
        
32
    Raises:
33
        AssertionError: If seconds is negative
34
    """
35
```
36

37
### Logging
38

39
Access the module logger for debugging and monitoring transcription operations.
40

41
```python { .api }
42
def get_logger():
43
    """
44
    Get the faster_whisper module logger.
45
    
46
    Returns:
47
        logging.Logger: Logger instance for faster_whisper module
48
        
49
    Usage:
50
        logger = get_logger()
51
        logger.info("Starting transcription")
52
        logger.warning("Low audio quality detected")
53
    """
54
```
55

56
### Asset Path Management
57

58
Get the path to package assets directory containing model metadata and other resources.
59

60
```python { .api }
61
def get_assets_path() -> str:
62
    """
63
    Get path to the faster_whisper assets directory.
64
    
65
    Returns:
66
        Absolute path to assets directory containing package resources
67
        
68
    Notes:
69
        - Contains model metadata and configuration files
70
        - Used internally by the library for resource access
71
    """
72
```
73

74
### Internal Utilities
75

76
Additional utility functions used internally by the library.
77

78
```python { .api }
79
def get_end(segments: list[dict]) -> float | None:
80
    """
81
    Get the end timestamp of the last segment with word-level timestamps.
82
    
83
    Args:
84
        segments: List of segment dictionaries with word timestamps
85
        
86
    Returns:
87
        End timestamp of last word, or last segment end time, or None if empty
88
        
89
    Notes:
90
        - Used internally for timestamp processing
91
        - Prefers word-level timestamps over segment timestamps
92
    """
93

94
class disabled_tqdm(tqdm):
95
    """
96
    Disabled tqdm progress bar for silent model downloads.
97
    
98
    Used internally to suppress progress bars during model downloads
99
    when progress display is not desired.
100
    """
101
    def __init__(self, *args, **kwargs):
102
        kwargs["disable"] = True
103
        super().__init__(*args, **kwargs)
104
```
105

106
## Usage Examples
107

108
### Timestamp Formatting
109

110
```python
111
from faster_whisper import format_timestamp
112

113
# Basic formatting
114
print(format_timestamp(65.123))      # "01:05.123"
115
print(format_timestamp(3661.5))      # "01:01:01.500"
116
print(format_timestamp(12.7))        # "00:12.700"
117

118
# Always include hours
119
print(format_timestamp(65.123, always_include_hours=True))  # "00:01:05.123"
120

121
# Custom decimal marker for European formats
122
print(format_timestamp(65.123, decimal_marker=","))  # "01:05,123"
123
```
124

125
### Processing Transcription Results with Timestamps
126

127
```python
128
from faster_whisper import WhisperModel, format_timestamp
129

130
model = WhisperModel("base")
131
segments, info = model.transcribe("meeting.mp3", word_timestamps=True)
132

133
print(f"Meeting transcript - Duration: {format_timestamp(info.duration)}")
134
print("=" * 50)
135

136
for i, segment in enumerate(segments, 1):
137
    start_ts = format_timestamp(segment.start)
138
    end_ts = format_timestamp(segment.end)
139
    
140
    print(f"Segment {i}: [{start_ts} -> {end_ts}]")
141
    print(f"  Text: {segment.text}")
142
    print(f"  Confidence: {segment.avg_logprob:.2f}")
143
    
144
    if segment.words:
145
        print("  Word timings:")
146
        for word in segment.words:
147
            word_start = format_timestamp(word.start)
148
            word_end = format_timestamp(word.end)
149
            print(f"    {word.word} [{word_start}-{word_end}] (p={word.probability:.2f})")
150
    print()
151
```
152

153
### Logging Configuration
154

155
```python
156
from faster_whisper import WhisperModel, get_logger
157
import logging
158

159
# Configure logging
160
logger = get_logger()
161
logger.setLevel(logging.INFO)
162

163
# Add custom handler
164
handler = logging.StreamHandler()
165
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
166
handler.setFormatter(formatter)
167
logger.addHandler(handler)
168

169
# Now faster_whisper operations will log information
170
model = WhisperModel("base")
171
segments, info = model.transcribe("audio.mp3")  # Will show loading/processing logs
172
```
173

174
### Subtitle Generation
175

176
```python
177
from faster_whisper import WhisperModel, format_timestamp
178

179
def generate_srt_subtitles(audio_path, output_path, max_chars_per_line=50):
180
    """Generate SRT subtitle file from audio."""
181
    model = WhisperModel("base")
182
    segments, info = model.transcribe(audio_path, word_timestamps=True)
183
    
184
    with open(output_path, 'w', encoding='utf-8') as f:
185
        for i, segment in enumerate(segments, 1):
186
            # Format timestamps for SRT (HH:MM:SS,mmm)
187
            start_time = format_timestamp(segment.start, 
188
                                        always_include_hours=True, 
189
                                        decimal_marker=",")
190
            end_time = format_timestamp(segment.end, 
191
                                      always_include_hours=True, 
192
                                      decimal_marker=",")
193
            
194
            # Break long text into multiple lines
195
            text = segment.text.strip()
196
            if len(text) > max_chars_per_line:
197
                # Simple word-based line breaking
198
                words = text.split()
199
                lines = []
200
                current_line = ""
201
                
202
                for word in words:
203
                    if len(current_line + " " + word) <= max_chars_per_line:
204
                        current_line += (" " + word if current_line else word)
205
                    else:
206
                        if current_line:
207
                            lines.append(current_line)
208
                        current_line = word
209
                
210
                if current_line:
211
                    lines.append(current_line)
212
                
213
                text = "\n".join(lines)
214
            
215
            # Write SRT entry
216
            f.write(f"{i}\n")
217
            f.write(f"{start_time} --> {end_time}\n")
218
            f.write(f"{text}\n\n")
219
    
220
    print(f"Generated subtitles: {output_path}")
221

222
# Generate subtitles
223
generate_srt_subtitles("video.mp4", "subtitles.srt")
224
```
225

226
### Batch Processing with Progress Tracking
227

228
```python
229
from faster_whisper import WhisperModel, format_timestamp, get_logger
230
import os
231
import time
232

233
def process_audio_directory(directory_path, output_dir):
234
    """Process all audio files in directory with progress tracking."""
235
    logger = get_logger()
236
    model = WhisperModel("base")
237
    
238
    # Find audio files
239
    audio_extensions = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
240
    audio_files = []
241
    
242
    for filename in os.listdir(directory_path):
243
        if any(filename.lower().endswith(ext) for ext in audio_extensions):
244
            audio_files.append(filename)
245
    
246
    print(f"Found {len(audio_files)} audio files to process")
247
    
248
    # Process each file
249
    results = []
250
    start_time = time.time()
251
    
252
    for i, filename in enumerate(audio_files, 1):
253
        file_path = os.path.join(directory_path, filename)
254
        print(f"\n[{i}/{len(audio_files)}] Processing: {filename}")
255
        
256
        try:
257
            # Transcribe
258
            file_start = time.time()
259
            segments, info = model.transcribe(file_path)
260
            processing_time = time.time() - file_start
261
            
262
            # Collect results
263
            transcript_text = " ".join(segment.text for segment in segments)
264
            duration_str = format_timestamp(info.duration)
265
            
266
            result = {
267
                'filename': filename,
268
                'duration': info.duration,
269
                'language': info.language,
270
                'confidence': info.language_probability,
271
                'processing_time': processing_time,
272
                'transcript': transcript_text
273
            }
274
            results.append(result)
275
            
276
            print(f"  Duration: {duration_str}")
277
            print(f"  Language: {info.language} (confidence: {info.language_probability:.2f})")
278
            print(f"  Processing time: {processing_time:.2f}s")
279
            print(f"  Speed: {info.duration/processing_time:.1f}x realtime")
280
            
281
            # Save individual transcript
282
            output_file = os.path.join(output_dir, f"{filename}.txt")
283
            with open(output_file, 'w', encoding='utf-8') as f:
284
                f.write(f"File: {filename}\n")
285
                f.write(f"Duration: {duration_str}\n")
286
                f.write(f"Language: {info.language}\n\n")
287
                f.write(transcript_text)
288
            
289
        except Exception as e:
290
            logger.error(f"Error processing {filename}: {e}")
291
            print(f"  ERROR: {e}")
292
    
293
    # Summary
294
    total_time = time.time() - start_time
295
    total_audio_duration = sum(r['duration'] for r in results)
296
    
297
    print(f"\n" + "="*50)
298
    print(f"Processing complete!")
299
    print(f"Files processed: {len(results)}/{len(audio_files)}")
300
    print(f"Total audio duration: {format_timestamp(total_audio_duration)}")
301
    print(f"Total processing time: {format_timestamp(total_time)}")
302
    print(f"Overall speed: {total_audio_duration/total_time:.1f}x realtime")
303

304
# Process directory
305
os.makedirs("transcripts", exist_ok=True)
306
process_audio_directory("audio_files", "transcripts")
307
```
308

309
## Version Information
310

311
The package version is available for programmatic access:
312

313
```python
314
from faster_whisper import __version__
315
print(f"faster-whisper version: {__version__}")  # "1.2.0"
316
```
317

318
This can be useful for:
319
- Compatibility checking in applications
320
- Logging and debugging information
321
- Feature detection based on version
322
- Integration with package management systems

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/