CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-faster-whisper

Faster Whisper transcription with CTranslate2 for high-performance speech recognition

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

Helper functions for timestamp formatting, model information, logging, and other utility operations that support the main speech recognition functionality.

Capabilities

Timestamp Formatting

Format floating-point seconds into human-readable timestamp strings with customizable format options.

def format_timestamp(
    seconds: float,
    always_include_hours: bool = False,
    decimal_marker: str = "."
) -> str:
    """
    Format seconds as timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
    
    Args:
        seconds: Time in seconds (must be non-negative)
        always_include_hours: If True, always include hours even if 0
        decimal_marker: Character to use for decimal point (default: ".")
        
    Returns:
        Formatted timestamp string
        
    Examples:
        format_timestamp(65.123) -> "01:05.123"
        format_timestamp(65.123, always_include_hours=True) -> "00:01:05.123"
        format_timestamp(3661.5, decimal_marker=",") -> "01:01:01,500"
        
    Raises:
        AssertionError: If seconds is negative
    """

Logging

Access the module logger for debugging and monitoring transcription operations.

def get_logger():
    """
    Get the faster_whisper module logger.
    
    Returns:
        logging.Logger: Logger instance for faster_whisper module
        
    Usage:
        logger = get_logger()
        logger.info("Starting transcription")
        logger.warning("Low audio quality detected")
    """

Asset Path Management

Get the path to package assets directory containing model metadata and other resources.

def get_assets_path() -> str:
    """
    Get path to the faster_whisper assets directory.
    
    Returns:
        Absolute path to assets directory containing package resources
        
    Notes:
        - Contains model metadata and configuration files
        - Used internally by the library for resource access
    """

Internal Utilities

Additional utility functions used internally by the library.

def get_end(segments: list[dict]) -> float | None:
    """
    Get the end timestamp of the last segment with word-level timestamps.
    
    Args:
        segments: List of segment dictionaries with word timestamps
        
    Returns:
        End timestamp of last word, or last segment end time, or None if empty
        
    Notes:
        - Used internally for timestamp processing
        - Prefers word-level timestamps over segment timestamps
    """

class disabled_tqdm(tqdm):
    """
    Disabled tqdm progress bar for silent model downloads.
    
    Used internally to suppress progress bars during model downloads
    when progress display is not desired.
    """
    def __init__(self, *args, **kwargs):
        kwargs["disable"] = True
        super().__init__(*args, **kwargs)

Usage Examples

Timestamp Formatting

from faster_whisper import format_timestamp

# Basic formatting
print(format_timestamp(65.123))      # "01:05.123"
print(format_timestamp(3661.5))      # "01:01:01.500"
print(format_timestamp(12.7))        # "00:12.700"

# Always include hours
print(format_timestamp(65.123, always_include_hours=True))  # "00:01:05.123"

# Custom decimal marker for European formats
print(format_timestamp(65.123, decimal_marker=","))  # "01:05,123"

Processing Transcription Results with Timestamps

from faster_whisper import WhisperModel, format_timestamp

model = WhisperModel("base")
segments, info = model.transcribe("meeting.mp3", word_timestamps=True)

print(f"Meeting transcript - Duration: {format_timestamp(info.duration)}")
print("=" * 50)

for i, segment in enumerate(segments, 1):
    start_ts = format_timestamp(segment.start)
    end_ts = format_timestamp(segment.end)
    
    print(f"Segment {i}: [{start_ts} -> {end_ts}]")
    print(f"  Text: {segment.text}")
    print(f"  Confidence: {segment.avg_logprob:.2f}")
    
    if segment.words:
        print("  Word timings:")
        for word in segment.words:
            word_start = format_timestamp(word.start)
            word_end = format_timestamp(word.end)
            print(f"    {word.word} [{word_start}-{word_end}] (p={word.probability:.2f})")
    print()

Logging Configuration

from faster_whisper import WhisperModel, get_logger
import logging

# Configure logging
logger = get_logger()
logger.setLevel(logging.INFO)

# Add custom handler
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

# Now faster_whisper operations will log information
model = WhisperModel("base")
segments, info = model.transcribe("audio.mp3")  # Will show loading/processing logs

Subtitle Generation

from faster_whisper import WhisperModel, format_timestamp

def generate_srt_subtitles(audio_path, output_path, max_chars_per_line=50):
    """Generate SRT subtitle file from audio."""
    model = WhisperModel("base")
    segments, info = model.transcribe(audio_path, word_timestamps=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        for i, segment in enumerate(segments, 1):
            # Format timestamps for SRT (HH:MM:SS,mmm)
            start_time = format_timestamp(segment.start, 
                                        always_include_hours=True, 
                                        decimal_marker=",")
            end_time = format_timestamp(segment.end, 
                                      always_include_hours=True, 
                                      decimal_marker=",")
            
            # Break long text into multiple lines
            text = segment.text.strip()
            if len(text) > max_chars_per_line:
                # Simple word-based line breaking
                words = text.split()
                lines = []
                current_line = ""
                
                for word in words:
                    if len(current_line + " " + word) <= max_chars_per_line:
                        current_line += (" " + word if current_line else word)
                    else:
                        if current_line:
                            lines.append(current_line)
                        current_line = word
                
                if current_line:
                    lines.append(current_line)
                
                text = "\n".join(lines)
            
            # Write SRT entry
            f.write(f"{i}\n")
            f.write(f"{start_time} --> {end_time}\n")
            f.write(f"{text}\n\n")
    
    print(f"Generated subtitles: {output_path}")

# Generate subtitles
generate_srt_subtitles("video.mp4", "subtitles.srt")

Batch Processing with Progress Tracking

from faster_whisper import WhisperModel, format_timestamp, get_logger
import os
import time

def process_audio_directory(directory_path, output_dir):
    """Process all audio files in directory with progress tracking."""
    logger = get_logger()
    model = WhisperModel("base")
    
    # Find audio files
    audio_extensions = {'.mp3', '.wav', '.m4a', '.flac', '.ogg'}
    audio_files = []
    
    for filename in os.listdir(directory_path):
        if any(filename.lower().endswith(ext) for ext in audio_extensions):
            audio_files.append(filename)
    
    print(f"Found {len(audio_files)} audio files to process")
    
    # Process each file
    results = []
    start_time = time.time()
    
    for i, filename in enumerate(audio_files, 1):
        file_path = os.path.join(directory_path, filename)
        print(f"\n[{i}/{len(audio_files)}] Processing: {filename}")
        
        try:
            # Transcribe
            file_start = time.time()
            segments, info = model.transcribe(file_path)
            processing_time = time.time() - file_start
            
            # Collect results
            transcript_text = " ".join(segment.text for segment in segments)
            duration_str = format_timestamp(info.duration)
            
            result = {
                'filename': filename,
                'duration': info.duration,
                'language': info.language,
                'confidence': info.language_probability,
                'processing_time': processing_time,
                'transcript': transcript_text
            }
            results.append(result)
            
            print(f"  Duration: {duration_str}")
            print(f"  Language: {info.language} (confidence: {info.language_probability:.2f})")
            print(f"  Processing time: {processing_time:.2f}s")
            print(f"  Speed: {info.duration/processing_time:.1f}x realtime")
            
            # Save individual transcript
            output_file = os.path.join(output_dir, f"{filename}.txt")
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(f"File: {filename}\n")
                f.write(f"Duration: {duration_str}\n")
                f.write(f"Language: {info.language}\n\n")
                f.write(transcript_text)
            
        except Exception as e:
            logger.error(f"Error processing {filename}: {e}")
            print(f"  ERROR: {e}")
    
    # Summary
    total_time = time.time() - start_time
    total_audio_duration = sum(r['duration'] for r in results)
    
    print(f"\n" + "="*50)
    print(f"Processing complete!")
    print(f"Files processed: {len(results)}/{len(audio_files)}")
    print(f"Total audio duration: {format_timestamp(total_audio_duration)}")
    print(f"Total processing time: {format_timestamp(total_time)}")
    print(f"Overall speed: {total_audio_duration/total_time:.1f}x realtime")

# Process directory
os.makedirs("transcripts", exist_ok=True)
process_audio_directory("audio_files", "transcripts")

Version Information

The package version is available for programmatic access:

from faster_whisper import __version__
print(f"faster-whisper version: {__version__}")  # "1.2.0"

This can be useful for:

  • Compatibility checking in applications
  • Logging and debugging information
  • Feature detection based on version
  • Integration with package management systems

Install with Tessl CLI

npx tessl i tessl/pypi-faster-whisper

docs

audio-processing.md

batched-processing.md

core-speech-recognition.md

index.md

utilities.md

voice-activity-detection.md

tile.json