Python API for retrieving YouTube video transcripts and subtitles without browser automation
Core data classes for representing transcript metadata, collections, and content. These structures provide the foundation for all transcript operations in the library.
Container for all available transcripts for a specific video. Provides methods to search and filter transcripts by language and type (manual vs. generated).
class TranscriptList:
def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
"""
Internal constructor. Use YouTubeTranscriptApi.list() to create instances.
Args:
video_id (str): Video ID this list belongs to
manually_created_transcripts (dict): Manual transcripts by language code
generated_transcripts (dict): Generated transcripts by language code
translation_languages (list): Available translation languages
"""
def find_transcript(self, language_codes):
"""
Find transcript with language priority. Prefers manual over generated.
Args:
language_codes (Iterable[str]): Language codes in priority order
Returns:
Transcript: First matching transcript found
Raises:
NoTranscriptFound: No transcript found for any requested language
"""
def find_generated_transcript(self, language_codes):
"""
Find automatically generated transcript.
Args:
language_codes (Iterable[str]): Language codes in priority order
Returns:
Transcript: First matching generated transcript
Raises:
NoTranscriptFound: No generated transcript found
"""
def find_manually_created_transcript(self, language_codes):
"""
Find manually created transcript.
Args:
language_codes (Iterable[str]): Language codes in priority order
Returns:
Transcript: First matching manual transcript
Raises:
NoTranscriptFound: No manual transcript found
"""
def __iter__(self):
"""
Iterate over all transcripts (manual first, then generated).
Yields:
Transcript: Each available transcript
"""
@property
def video_id(self):
"""str: Video ID this transcript list belongs to"""Metadata and fetching interface for an individual transcript. Represents a specific language version of a video's subtitles.
class Transcript:
def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
"""
Internal constructor. Access via TranscriptList methods.
"""
def fetch(self, preserve_formatting=False):
"""
Load the actual transcript content.
Args:
preserve_formatting (bool, optional): Keep HTML formatting tags. Defaults to False
Returns:
FetchedTranscript: Transcript with content and timing data
Raises:
PoTokenRequired: PO token required for this video
YouTubeRequestFailed: HTTP request failed
"""
def translate(self, language_code):
"""
Create translated version of this transcript.
Args:
language_code (str): Target language code for translation
Returns:
Transcript: New transcript object for translated version
Raises:
NotTranslatable: This transcript cannot be translated
TranslationLanguageNotAvailable: Requested language not available
"""
@property
def video_id(self):
"""str: Video ID this transcript belongs to"""
@property
def language(self):
"""str: Human-readable language name"""
@property
def language_code(self):
"""str: Language code (e.g., 'en', 'es', 'fr')"""
@property
def is_generated(self):
"""bool: True if automatically generated, False if manually created"""
@property
def translation_languages(self):
"""list: Available languages for translation"""
@property
def is_translatable(self):
"""bool: True if this transcript can be translated"""Complete transcript data with timing information. Contains the actual subtitle content as a sequence of time-stamped text snippets.
class FetchedTranscript:
def __init__(self, snippets, video_id, language, language_code, is_generated):
"""
Fetched transcript with content. Created by Transcript.fetch().
Args:
snippets (List[FetchedTranscriptSnippet]): Transcript content
video_id (str): Video ID
language (str): Language name
language_code (str): Language code
is_generated (bool): Whether auto-generated
"""
def to_raw_data(self):
"""
Convert to raw dictionary format for serialization.
Returns:
List[Dict]: List of snippet dictionaries with text, start, duration
"""
def __iter__(self):
"""
Iterate over transcript snippets.
Yields:
FetchedTranscriptSnippet: Each text snippet with timing
"""
def __getitem__(self, index):
"""
Access snippet by index.
Args:
index (int): Snippet index
Returns:
FetchedTranscriptSnippet: Snippet at index
"""
def __len__(self):
"""
Get number of snippets.
Returns:
int: Number of transcript snippets
"""
@property
def snippets(self):
"""List[FetchedTranscriptSnippet]: All transcript snippets"""
@property
def video_id(self):
"""str: Video ID this transcript belongs to"""
@property
def language(self):
"""str: Human-readable language name"""
@property
def language_code(self):
"""str: Language code"""
@property
def is_generated(self):
"""bool: True if automatically generated"""Individual text segment with precise timing information. Represents a single subtitle entry with start time and duration.
class FetchedTranscriptSnippet:
def __init__(self, text, start, duration):
"""
Single transcript snippet with timing.
Args:
text (str): Transcript text content
start (float): Start timestamp in seconds
duration (float): Duration in seconds (screen display time, not speech duration)
"""
@property
def text(self):
"""str: Transcript text content"""
@property
def start(self):
"""float: Start timestamp in seconds"""
@property
def duration(self):
"""float: Duration in seconds (screen display time)"""from youtube_transcript_api import YouTubeTranscriptApi
api = YouTubeTranscriptApi()
transcript_list = api.list('dQw4w9WgXcQ')
# Print all available transcripts
print(f"Available transcripts for {transcript_list.video_id}:")
for transcript in transcript_list:
print(f" {transcript.language_code}: {transcript.language}")
print(f" Generated: {transcript.is_generated}")
print(f" Translatable: {transcript.is_translatable}")
# Find specific transcript types
try:
manual_en = transcript_list.find_manually_created_transcript(['en'])
print(f"Found manual English transcript: {manual_en.language}")
except NoTranscriptFound:
print("No manual English transcript available")
try:
auto_es = transcript_list.find_generated_transcript(['es'])
print(f"Found generated Spanish transcript: {auto_es.language}")
except NoTranscriptFound:
print("No generated Spanish transcript available")from youtube_transcript_api import YouTubeTranscriptApi
api = YouTubeTranscriptApi()
transcript_list = api.list('dQw4w9WgXcQ')
transcript = transcript_list.find_transcript(['en'])
print(f"Transcript info:")
print(f" Video: {transcript.video_id}")
print(f" Language: {transcript.language} ({transcript.language_code})")
print(f" Generated: {transcript.is_generated}")
print(f" Translatable: {transcript.is_translatable}")
# Fetch content
fetched = transcript.fetch()
print(f"Fetched {len(fetched)} snippets")
# Translate if possible
if transcript.is_translatable:
french = transcript.translate('fr')
french_content = french.fetch()
print(f"Translated to French: {len(french_content)} snippets")from youtube_transcript_api import YouTubeTranscriptApi
api = YouTubeTranscriptApi()
transcript = api.fetch('dQw4w9WgXcQ')
# Basic information
print(f"Video: {transcript.video_id}")
print(f"Language: {transcript.language}")
print(f"Total snippets: {len(transcript)}")
# Iterate through content
for i, snippet in enumerate(transcript):
end_time = snippet.start + snippet.duration
print(f"[{snippet.start:.2f}-{end_time:.2f}s] {snippet.text}")
if i >= 5: # Show first 5 snippets
break
# Access specific snippets
first_snippet = transcript[0]
print(f"First snippet: '{first_snippet.text}' at {first_snippet.start}s")
# Convert to raw data for serialization
raw_data = transcript.to_raw_data()
print(f"Raw format: {raw_data[0]}") # {'text': '...', 'start': 0.0, 'duration': 3.84}from typing import List, Dict, Iterator, Iterable
from dataclasses import dataclass
# Internal translation language type
@dataclass
class _TranslationLanguage:
language: str
language_code: strInstall with Tessl CLI
npx tessl i tessl/pypi-youtube-transcript-api