Python API for retrieving YouTube video transcripts and subtitles without browser automation
Comprehensive exception hierarchy for handling all error scenarios in YouTube transcript retrieval. Provides detailed error messages and actionable guidance for common issues.
Root exception classes providing the foundation for all library-specific errors.
class YouTubeTranscriptApiException(Exception):
"""
Base exception for all library errors.
All exceptions raised by this library inherit from this class.
"""
class CouldNotRetrieveTranscript(YouTubeTranscriptApiException):
"""
Base class for transcript retrieval failures.
Attributes:
video_id (str): YouTube video ID that failed
cause (str): Detailed error description
"""
def __init__(self, video_id):
"""
Initialize transcript retrieval error.
Args:
video_id (str): Video ID that failed
"""
@property
def video_id(self):
"""str: Video ID that failed"""
@property
def cause(self):
"""str: Detailed error cause description"""Errors related to video accessibility and availability.
class VideoUnavailable(CouldNotRetrieveTranscript):
"""
Video is no longer available on YouTube.
Common causes:
- Video deleted by uploader
- Video removed for policy violations
- Video made private
"""
class VideoUnplayable(CouldNotRetrieveTranscript):
"""
Video cannot be played due to restrictions.
Attributes:
reason (str): Primary reason for unplayability
sub_reasons (List[str]): Additional details
"""
def __init__(self, video_id, reason, sub_reasons):
"""
Initialize video unplayable error.
Args:
video_id (str): Video ID
reason (str): Primary reason
sub_reasons (List[str]): Additional details
"""
class InvalidVideoId(CouldNotRetrieveTranscript):
"""
Invalid video ID format provided.
Common causes:
- Passing full YouTube URL instead of video ID
- Malformed video ID
"""
class AgeRestricted(CouldNotRetrieveTranscript):
"""
Video is age-restricted and requires authentication.
Note:
Cookie authentication is currently disabled in the library.
"""Errors related to transcript and subtitle availability.
class TranscriptsDisabled(CouldNotRetrieveTranscript):
"""
Subtitles/transcripts are disabled for this video.
Common causes:
- Uploader disabled closed captions
- Video doesn't have transcripts
- Channel settings prevent transcript access
"""
class NoTranscriptFound(CouldNotRetrieveTranscript):
"""
No transcript found for requested languages.
Attributes:
_requested_language_codes (Iterable[str]): Languages that were requested
_transcript_data (TranscriptList): Available transcripts for debugging
"""
def __init__(self, video_id, requested_language_codes, transcript_data):
"""
Initialize no transcript found error.
Args:
video_id (str): Video ID
requested_language_codes (Iterable[str]): Requested languages
transcript_data (TranscriptList): Available transcripts
"""Errors related to transcript translation functionality.
class NotTranslatable(CouldNotRetrieveTranscript):
"""
Transcript cannot be translated.
Common causes:
- Transcript doesn't support translation
- Original transcript is already translated
"""
class TranslationLanguageNotAvailable(CouldNotRetrieveTranscript):
"""
Requested translation language is not available.
Common causes:
- Language code not supported by YouTube
- Translation not available for this transcript
"""Errors related to network requests and IP blocking by YouTube.
class YouTubeRequestFailed(CouldNotRetrieveTranscript):
"""
HTTP request to YouTube failed.
Attributes:
reason (str): HTTP error details
"""
def __init__(self, video_id, http_error):
"""
Initialize YouTube request failure.
Args:
video_id (str): Video ID
http_error (HTTPError): Original HTTP error
"""
class RequestBlocked(CouldNotRetrieveTranscript):
"""
Requests blocked by YouTube (generic blocking).
Common causes:
- Too many requests from IP
- Cloud provider IP blocked
- Bot detection triggered
Supports proxy-specific error messages via with_proxy_config().
"""
def __init__(self, video_id):
"""
Initialize request blocked error.
Args:
video_id (str): Video ID
"""
def with_proxy_config(self, proxy_config):
"""
Add proxy configuration context for better error messages.
Args:
proxy_config (ProxyConfig): Proxy configuration used
Returns:
RequestBlocked: Self for method chaining
"""
class IpBlocked(RequestBlocked):
"""
IP address specifically blocked by YouTube.
More severe than RequestBlocked, indicates IP-level ban.
"""Errors related to authentication and special access requirements.
class FailedToCreateConsentCookie(CouldNotRetrieveTranscript):
"""
Failed to automatically create consent cookie for GDPR compliance.
Common in EU regions where consent is required.
"""
class PoTokenRequired(CouldNotRetrieveTranscript):
"""
PO Token required for video access.
Rare error for videos requiring special authentication tokens.
"""
class YouTubeDataUnparsable(CouldNotRetrieveTranscript):
"""
YouTube response data cannot be parsed.
Usually indicates changes in YouTube's internal API.
"""Errors related to cookie authentication (currently disabled).
class CookieError(YouTubeTranscriptApiException):
"""
Base class for cookie-related errors.
Note:
Cookie authentication is temporarily disabled.
"""
class CookiePathInvalid(CookieError):
"""
Invalid cookie file path provided.
Args:
cookie_path (Path): Invalid path that was provided
"""
def __init__(self, cookie_path):
"""
Initialize invalid cookie path error.
Args:
cookie_path (Path): Invalid cookie file path
"""
class CookieInvalid(CookieError):
"""
Cookie file is invalid or expired.
Args:
cookie_path (Path): Path to invalid cookie file
"""
def __init__(self, cookie_path):
"""
Initialize invalid cookie error.
Args:
cookie_path (Path): Cookie file path
"""from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api import (
VideoUnavailable,
TranscriptsDisabled,
NoTranscriptFound,
RequestBlocked,
InvalidVideoId
)
api = YouTubeTranscriptApi()
try:
transcript = api.fetch('dQw4w9WgXcQ')
print("Transcript retrieved successfully")
except VideoUnavailable:
print("Video is no longer available")
except TranscriptsDisabled:
print("Transcripts are disabled for this video")
except NoTranscriptFound:
print("No transcript found for requested languages")
except RequestBlocked as e:
print(f"Request blocked: {e}")
print("Consider using proxies or waiting before retrying")
except InvalidVideoId:
print("Invalid video ID - use video ID, not full URL")
except Exception as e:
print(f"Unexpected error: {e}")from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api import (
NoTranscriptFound,
VideoUnplayable,
YouTubeRequestFailed
)
api = YouTubeTranscriptApi()
try:
transcript = api.fetch('invalid_video_id', languages=['es', 'en'])
except NoTranscriptFound as e:
print(f"Video ID: {e.video_id}")
print(f"Requested languages: {e._requested_language_codes}")
print(f"Available transcripts:\n{e._transcript_data}")
except VideoUnplayable as e:
print(f"Video unplayable: {e.reason}")
if e.sub_reasons:
print("Additional details:")
for reason in e.sub_reasons:
print(f" - {reason}")
except YouTubeRequestFailed as e:
print(f"HTTP request failed: {e.reason}")from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.proxies import WebshareProxyConfig
from youtube_transcript_api import RequestBlocked, IpBlocked
proxy_config = WebshareProxyConfig('username', 'password')
api = YouTubeTranscriptApi(proxy_config=proxy_config)
try:
transcript = api.fetch('dQw4w9WgXcQ')
except RequestBlocked as e:
# Error message will include Webshare-specific guidance
print(f"Blocked despite Webshare proxy: {e}")
except IpBlocked as e:
print(f"IP blocked: {e}")from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api import RequestBlocked, IpBlocked
import time
import random
def fetch_with_retry(video_id, max_retries=3):
api = YouTubeTranscriptApi()
for attempt in range(max_retries):
try:
return api.fetch(video_id)
except (RequestBlocked, IpBlocked) as e:
if attempt == max_retries - 1:
raise e
# Exponential backoff with jitter
delay = (2 ** attempt) + random.uniform(0, 1)
print(f"Request blocked, retrying in {delay:.1f}s...")
time.sleep(delay)
except Exception as e:
# Don't retry for other errors
raise e
# Usage
try:
transcript = fetch_with_retry('dQw4w9WgXcQ')
print("Successfully retrieved transcript")
except Exception as e:
print(f"Failed after retries: {e}")from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api import NoTranscriptFound
def fetch_best_transcript(video_id, preferred_languages=['en', 'es', 'fr']):
api = YouTubeTranscriptApi()
try:
# Try preferred languages first
transcript_list = api.list(video_id)
transcript = transcript_list.find_transcript(preferred_languages)
return transcript.fetch()
except NoTranscriptFound:
# Fall back to any available transcript
try:
# Get first available transcript
for transcript in transcript_list:
return transcript.fetch()
except Exception:
# Try generated transcripts only
try:
transcript = transcript_list.find_generated_transcript(['en'])
return transcript.fetch()
except NoTranscriptFound:
raise Exception("No transcripts available for this video")
# Usage
try:
transcript = fetch_best_transcript('dQw4w9WgXcQ')
print(f"Retrieved transcript in {transcript.language}")
except Exception as e:
print(f"Could not retrieve any transcript: {e}")from typing import List, Iterable, Optional
from pathlib import Path
from requests import HTTPError
# Exception hierarchy types
TranscriptRetrievalError = CouldNotRetrieveTranscript
NetworkError = Union[RequestBlocked, IpBlocked, YouTubeRequestFailed]
VideoError = Union[VideoUnavailable, VideoUnplayable, InvalidVideoId]
TranscriptError = Union[TranscriptsDisabled, NoTranscriptFound]
TranslationError = Union[NotTranslatable, TranslationLanguageNotAvailable]Install with Tessl CLI
npx tessl i tessl/pypi-youtube-transcript-api