Faster Whisper transcription with CTranslate2 for high-performance speech recognition
—
Helper functions for timestamp formatting, model information, logging, and other utility operations that support the main speech recognition functionality.
Format floating-point seconds into human-readable timestamp strings with customizable format options.
def format_timestamp(
seconds: float,
always_include_hours: bool = False,
decimal_marker: str = "."
) -> str:
"""
Format seconds as timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
Args:
seconds: Time in seconds (must be non-negative)
always_include_hours: If True, always include hours even if 0
decimal_marker: Character to use for decimal point (default: ".")
Returns:
Formatted timestamp string
Examples:
format_timestamp(65.123) -> "01:05.123"
format_timestamp(65.123, always_include_hours=True) -> "00:01:05.123"
format_timestamp(3661.5, decimal_marker=",") -> "01:01:01,500"
Raises:
AssertionError: If seconds is negative
"""Access the module logger for debugging and monitoring transcription operations.
def get_logger():
"""
Get the faster_whisper module logger.
Returns:
logging.Logger: Logger instance for faster_whisper module
Usage:
logger = get_logger()
logger.info("Starting transcription")
logger.warning("Low audio quality detected")
"""Get the path to package assets directory containing model metadata and other resources.
def get_assets_path() -> str:
"""
Get path to the faster_whisper assets directory.
Returns:
Absolute path to assets directory containing package resources
Notes:
- Contains model metadata and configuration files
- Used internally by the library for resource access
"""Additional utility functions used internally by the library.
def get_end(segments: list[dict]) -> float | None:
"""
Get the end timestamp of the last segment with word-level timestamps.
Args:
segments: List of segment dictionaries with word timestamps
Returns:
End timestamp of last word, or last segment end time, or None if empty
Notes:
- Used internally for timestamp processing
- Prefers word-level timestamps over segment timestamps
"""
class disabled_tqdm(tqdm):
"""
Disabled tqdm progress bar for silent model downloads.
Used internally to suppress progress bars during model downloads
when progress display is not desired.
"""
def __init__(self, *args, **kwargs):
kwargs["disable"] = True
super().__init__(*args, **kwargs)from faster_whisper import format_timestamp
# Basic formatting
print(format_timestamp(65.123)) # "01:05.123"
print(format_timestamp(3661.5)) # "01:01:01.500"
print(format_timestamp(12.7)) # "00:12.700"
# Always include hours
print(format_timestamp(65.123, always_include_hours=True)) # "00:01:05.123"
# Custom decimal marker for European formats
print(format_timestamp(65.123, decimal_marker=",")) # "01:05,123"from faster_whisper import WhisperModel, format_timestamp
model = WhisperModel("base")
segments, info = model.transcribe("meeting.mp3", word_timestamps=True)
print(f"Meeting transcript - Duration: {format_timestamp(info.duration)}")
print("=" * 50)
for i, segment in enumerate(segments, 1):
start_ts = format_timestamp(segment.start)
end_ts = format_timestamp(segment.end)
print(f"Segment {i}: [{start_ts} -> {end_ts}]")
print(f" Text: {segment.text}")
print(f" Confidence: {segment.avg_logprob:.2f}")
if segment.words:
print(" Word timings:")
for word in segment.words:
word_start = format_timestamp(word.start)
word_end = format_timestamp(word.end)
print(f" {word.word} [{word_start}-{word_end}] (p={word.probability:.2f})")
print()from faster_whisper import WhisperModel, get_logger
import logging
# Configure logging
logger = get_logger()
logger.setLevel(logging.INFO)
# Add custom handler
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
# Now faster_whisper operations will log information
model = WhisperModel("base")
segments, info = model.transcribe("audio.mp3") # Will show loading/processing logsfrom faster_whisper import WhisperModel, format_timestamp
def generate_srt_subtitles(audio_path, output_path, max_chars_per_line=50):
"""Generate SRT subtitle file from audio."""
model = WhisperModel("base")
segments, info = model.transcribe(audio_path, word_timestamps=True)
with open(output_path, 'w', encoding='utf-8') as f:
for i, segment in enumerate(segments, 1):
# Format timestamps for SRT (HH:MM:SS,mmm)
start_time = format_timestamp(segment.start,
always_include_hours=True,
decimal_marker=",")
end_time = format_timestamp(segment.end,
always_include_hours=True,
decimal_marker=",")
# Break long text into multiple lines
text = segment.text.strip()
if len(text) > max_chars_per_line:
# Simple word-based line breaking
words = text.split()
lines = []
current_line = ""
for word in words:
if len(current_line + " " + word) <= max_chars_per_line:
current_line += (" " + word if current_line else word)
else:
if current_line:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
text = "\n".join(lines)
# Write SRT entry
f.write(f"{i}\n")
f.write(f"{start_time} --> {end_time}\n")
f.write(f"{text}\n\n")
print(f"Generated subtitles: {output_path}")
# Generate subtitles
generate_srt_subtitles("video.mp4", "subtitles.srt")from faster_whisper import WhisperModel, format_timestamp, get_logger
import os
import time
def process_audio_directory(directory_path, output_dir):
"""Process all audio files in directory with progress tracking."""
logger = get_logger()
model = WhisperModel("base")
# Find audio files
audio_extensions = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
audio_files = []
for filename in os.listdir(directory_path):
if any(filename.lower().endswith(ext) for ext in audio_extensions):
audio_files.append(filename)
print(f"Found {len(audio_files)} audio files to process")
# Process each file
results = []
start_time = time.time()
for i, filename in enumerate(audio_files, 1):
file_path = os.path.join(directory_path, filename)
print(f"\n[{i}/{len(audio_files)}] Processing: {filename}")
try:
# Transcribe
file_start = time.time()
segments, info = model.transcribe(file_path)
processing_time = time.time() - file_start
# Collect results
transcript_text = " ".join(segment.text for segment in segments)
duration_str = format_timestamp(info.duration)
result = {
'filename': filename,
'duration': info.duration,
'language': info.language,
'confidence': info.language_probability,
'processing_time': processing_time,
'transcript': transcript_text
}
results.append(result)
print(f" Duration: {duration_str}")
print(f" Language: {info.language} (confidence: {info.language_probability:.2f})")
print(f" Processing time: {processing_time:.2f}s")
print(f" Speed: {info.duration/processing_time:.1f}x realtime")
# Save individual transcript
output_file = os.path.join(output_dir, f"{filename}.txt")
with open(output_file, 'w', encoding='utf-8') as f:
f.write(f"File: {filename}\n")
f.write(f"Duration: {duration_str}\n")
f.write(f"Language: {info.language}\n\n")
f.write(transcript_text)
except Exception as e:
logger.error(f"Error processing {filename}: {e}")
print(f" ERROR: {e}")
# Summary
total_time = time.time() - start_time
total_audio_duration = sum(r['duration'] for r in results)
print(f"\n" + "="*50)
print(f"Processing complete!")
print(f"Files processed: {len(results)}/{len(audio_files)}")
print(f"Total audio duration: {format_timestamp(total_audio_duration)}")
print(f"Total processing time: {format_timestamp(total_time)}")
print(f"Overall speed: {total_audio_duration/total_time:.1f}x realtime")
# Process directory
os.makedirs("transcripts", exist_ok=True)
process_audio_directory("audio_files", "transcripts")The package version is available for programmatic access:
from faster_whisper import __version__
print(f"faster-whisper version: {__version__}") # "1.2.0"This can be useful for:
Install with Tessl CLI
npx tessl i tessl/pypi-faster-whisper