Python API for retrieving YouTube video transcripts and subtitles without browser automation
Classes for converting transcript data into various output formats. Supports JSON, plain text, SRT subtitles, WebVTT, and pretty-printed formats for different use cases.
Abstract base class defining the formatter interface. All concrete formatters inherit from this class.
class Formatter:
def format_transcript(self, transcript, **kwargs):
"""
Format a single transcript.
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Formatter-specific options
Returns:
str: Formatted transcript string
Raises:
NotImplementedError: Must be implemented by subclasses
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Format multiple transcripts.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Formatter-specific options
Returns:
str: Formatted transcripts string
Raises:
NotImplementedError: Must be implemented by subclasses
"""Converts transcript data to JSON format for programmatic processing and data interchange.
class JSONFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
"""
Convert transcript to JSON string.
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Passed to json.dumps() (indent, ensure_ascii, etc.)
Returns:
str: JSON representation of transcript data
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Convert multiple transcripts to JSON array string.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Passed to json.dumps()
Returns:
str: JSON array of transcript data
"""Converts transcripts to plain text with no timestamps. Useful for text analysis and content extraction.
class TextFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
"""
Convert transcript to plain text (no timestamps).
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Unused
Returns:
str: Plain text with lines separated by newlines
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Convert multiple transcripts to plain text.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Unused
Returns:
str: Plain text with transcripts separated by triple newlines
"""Human-readable formatted output using Python's pprint module for debugging and inspection.
class PrettyPrintFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
"""
Pretty print transcript data.
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Passed to pprint.pformat()
Returns:
str: Pretty formatted transcript representation
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Pretty print multiple transcripts.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Passed to pprint.pformat()
Returns:
str: Pretty formatted list of transcripts
"""Creates SRT (SubRip) subtitle files compatible with video players and subtitle software.
class SRTFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
"""
Convert transcript to SRT subtitle format.
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Unused
Returns:
str: SRT formatted subtitles with sequence numbers and timestamps
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Convert multiple transcripts to SRT format.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Unused
Returns:
str: Combined SRT formatted subtitles
"""Creates WebVTT subtitle files for web video players and HTML5 video elements.
class WebVTTFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
"""
Convert transcript to WebVTT subtitle format.
Args:
transcript (FetchedTranscript): Transcript to format
**kwargs: Unused
Returns:
str: WebVTT formatted subtitles with WEBVTT header
"""
def format_transcripts(self, transcripts, **kwargs):
"""
Convert multiple transcripts to WebVTT format.
Args:
transcripts (List[FetchedTranscript]): Transcripts to format
**kwargs: Unused
Returns:
str: Combined WebVTT formatted subtitles
"""Utility class for loading formatters by type string. Provides a convenient interface for dynamic formatter selection.
class FormatterLoader:
TYPES = {
"json": JSONFormatter,
"pretty": PrettyPrintFormatter,
"text": TextFormatter,
"webvtt": WebVTTFormatter,
"srt": SRTFormatter,
}
def load(self, formatter_type="pretty"):
"""
Load formatter by type string.
Args:
formatter_type (str): Formatter type name. Defaults to "pretty"
Returns:
Formatter: Formatter instance
Raises:
UnknownFormatterType: Invalid formatter type
"""
class UnknownFormatterType(Exception):
def __init__(self, formatter_type):
"""
Exception for invalid formatter types.
Args:
formatter_type (str): The invalid formatter type
"""from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter, TextFormatter
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
# JSON format
json_formatter = JSONFormatter()
json_output = json_formatter.format_transcript(transcript)
print(json_output)
# Plain text format
text_formatter = TextFormatter()
text_output = text_formatter.format_transcript(transcript)
print(text_output)from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import SRTFormatter, WebVTTFormatter
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
# Create SRT subtitle file
srt_formatter = SRTFormatter()
srt_content = srt_formatter.format_transcript(transcript)
with open('subtitles.srt', 'w', encoding='utf-8') as f:
f.write(srt_content)
# Create WebVTT subtitle file
webvtt_formatter = WebVTTFormatter()
webvtt_content = webvtt_formatter.format_transcript(transcript)
with open('subtitles.vtt', 'w', encoding='utf-8') as f:
f.write(webvtt_content)from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import FormatterLoader
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
loader = FormatterLoader()
# Load different formatters dynamically
for format_type in ['json', 'text', 'srt', 'webvtt', 'pretty']:
formatter = loader.load(format_type)
output = formatter.format_transcript(transcript)
print(f"=== {format_type.upper()} ===")
print(output[:200] + "..." if len(output) > 200 else output)
print()from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import JSONFormatter
import json
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
json_formatter = JSONFormatter()
# Pretty printed JSON
pretty_json = json_formatter.format_transcript(transcript, indent=2, ensure_ascii=False)
print(pretty_json)
# Compact JSON
compact_json = json_formatter.format_transcript(transcript, separators=(',', ':'))
print(compact_json)from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
api = YouTubeTranscriptApi()
# Get transcripts in different languages
video_ids = ['dQw4w9WgXcQ', 'jNQXAC9IVRw']
transcripts = []
for video_id in video_ids:
try:
transcript = api.fetch(video_id)
transcripts.append(transcript)
except Exception as e:
print(f"Failed to fetch {video_id}: {e}")
# Format all transcripts together
if transcripts:
text_formatter = TextFormatter()
combined_text = text_formatter.format_transcripts(transcripts)
print(combined_text)from typing import List
from youtube_transcript_api._transcripts import FetchedTranscript
# Formatter interface types
FormatterType = str # One of: "json", "text", "pretty", "srt", "webvtt"
FormatterKwargs = dict # Formatter-specific keyword argumentsInstall with Tessl CLI
npx tessl i tessl/pypi-youtube-transcript-api