Python library for downloading YouTube videos with comprehensive stream management and metadata extraction capabilities.
—
Caption track extraction and conversion to .srt format with support for multiple languages and automatic subtitle generation from YouTube videos.
Represents an individual caption track with language-specific subtitle data and conversion capabilities.
class Caption:
def __init__(self, caption_track: Dict):
"""
Initialize a Caption object.
Args:
caption_track (dict): Caption track metadata dictionary
"""Access caption track information and content.
@property
def url(self) -> str:
"""Get the URL for downloading the caption track."""
@property
def name(self) -> str:
"""Get the human-readable name of the caption track (e.g., 'English', 'Spanish')."""
@property
def code(self) -> str:
"""Get the language code for the caption track (e.g., 'en', 'es', 'fr')."""
@property
def xml_captions(self) -> str:
"""Get the raw XML caption data from YouTube."""
@property
def json_captions(self) -> dict:
"""Get the parsed JSON caption data."""Convert caption data between formats.
def generate_srt_captions(self) -> str:
"""
Convert the caption track to SRT (SubRip) format.
Returns:
str: Caption content in SRT format with timestamps and text
"""Download caption files with various format options.
def download(
self,
title: str,
srt: bool = True,
output_path: Optional[str] = None,
filename_prefix: Optional[str] = None
) -> str:
"""
Download the caption track to a file.
Args:
title (str): Base filename for the caption file
srt (bool): Convert to SRT format (default: True)
output_path (str, optional): Directory to save the file
filename_prefix (str, optional): Prefix to add to filename
Returns:
str: Path to the downloaded caption file
"""Utility methods for caption format conversion.
@staticmethod
def float_to_srt_time_format(d: float) -> str:
"""
Convert a float timestamp to SRT time format.
Args:
d (float): Time in seconds as a float
Returns:
str: Time in SRT format (HH:MM:SS,mmm)
"""
@staticmethod
def xml_caption_to_srt(xml_captions: str) -> str:
"""
Convert XML caption data to SRT format.
Args:
xml_captions (str): Raw XML caption content
Returns:
str: Caption content converted to SRT format
"""Query interface for caption collections providing dictionary-like access to caption tracks by language code.
class CaptionQuery:
def __init__(self, captions: List[Caption]):
"""
Initialize CaptionQuery with a list of caption tracks.
Args:
captions (List[Caption]): List of available caption tracks
"""Access caption tracks by language code and iterate through available captions.
def __getitem__(self, lang_code: str) -> Caption:
"""
Get caption track by language code.
Args:
lang_code (str): Language code (e.g., 'en', 'es', 'fr')
Returns:
Caption: Caption track for the specified language
Raises:
KeyError: If language code is not found
"""
def __len__(self) -> int:
"""
Get the number of available caption tracks.
Returns:
int: Number of caption tracks
"""
def __iter__(self) -> Iterator[Caption]:
"""
Iterate through all available caption tracks.
Returns:
Iterator[Caption]: Iterator over caption tracks
"""
### Deprecated Methods
Legacy methods maintained for backward compatibility.
```python { .api }
def get_by_language_code(self, lang_code: str) -> Optional[Caption]:
"""
Get caption track by language code.
**DEPRECATED**: Use dictionary-style access with captions[lang_code] instead.
Args:
lang_code (str): Language code (e.g., 'en', 'es')
Returns:
Caption or None: Caption track for the specified language
"""
def all(self) -> List[Caption]:
"""
Get all the results represented by this query as a list.
**DEPRECATED**: CaptionQuery can be treated as a dictionary/iterable directly.
Returns:
List[Caption]: All caption tracks
"""from pytube import YouTube
# Get video with captions
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
# Check available caption tracks
print("Available captions:")
for caption in yt.captions:
print(f"- {caption.name} ({caption.code})")
# Download English captions
if 'en' in yt.captions:
caption = yt.captions['en']
caption.download(title=yt.title)
print(f"Downloaded captions: {caption.name}")from pytube import YouTube
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
# Get English captions and convert to SRT
if 'en' in yt.captions:
caption = yt.captions['en']
# Generate SRT content
srt_content = caption.generate_srt_captions()
# Save to custom file
with open('custom_captions.srt', 'w', encoding='utf-8') as f:
f.write(srt_content)
print("SRT file created: custom_captions.srt")from pytube import YouTube
import os
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
# Create captions directory
captions_dir = "captions"
os.makedirs(captions_dir, exist_ok=True)
# Download all available caption tracks
for caption in yt.captions:
try:
file_path = caption.download(
title=yt.title,
output_path=captions_dir,
filename_prefix=f"{caption.code}_"
)
print(f"Downloaded {caption.name}: {file_path}")
except Exception as e:
print(f"Failed to download {caption.name}: {e}")from pytube import YouTube
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
if 'en' in yt.captions:
caption = yt.captions['en']
# Get raw caption data
xml_data = caption.xml_captions
json_data = caption.json_captions
print(f"XML data length: {len(xml_data)} characters")
print(f"JSON entries: {len(json_data.get('events', []))}")
# Convert to SRT and analyze
srt_content = caption.generate_srt_captions()
srt_lines = srt_content.split('\n')
subtitle_count = srt_content.count('\n\n') + 1
print(f"SRT content: {len(srt_lines)} lines")
print(f"Number of subtitles: {subtitle_count}")from pytube import YouTube
import re
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
if 'en' in yt.captions:
caption = yt.captions['en']
srt_content = caption.generate_srt_captions()
# Extract all subtitle text (remove timestamps and numbering)
subtitle_pattern = r'\d+\n\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n(.+?)(?=\n\n|\n\d+\n|\Z)'
matches = re.findall(subtitle_pattern, srt_content, re.DOTALL)
all_text = ' '.join(match.replace('\n', ' ') for match in matches)
print(f"Full transcript: {all_text[:200]}...")from pytube import YouTube
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
# Check if captions are available
if len(yt.captions) == 0:
print("No captions available for this video")
else:
print(f"Found {len(yt.captions)} caption tracks")
# Try to get specific language with fallback
preferred_languages = ['en', 'en-US', 'en-GB']
selected_caption = None
for lang in preferred_languages:
if lang in yt.captions:
selected_caption = yt.captions[lang]
break
if selected_caption:
try:
selected_caption.download(title=yt.title)
print(f"Downloaded captions: {selected_caption.name}")
except Exception as e:
print(f"Download failed: {e}")
else:
# Fall back to first available caption
first_caption = next(iter(yt.captions))
print(f"Using fallback caption: {first_caption.name}")
first_caption.download(title=yt.title)from pytube import YouTube
import json
def extract_captions_for_timerange(caption, start_seconds, end_seconds):
"""Extract captions for a specific time range."""
json_data = caption.json_captions
events = json_data.get('events', [])
selected_captions = []
for event in events:
if 'tStartMs' in event and 'dDurationMs' in event:
start_ms = event['tStartMs']
duration_ms = event['dDurationMs']
start_time = start_ms / 1000
end_time = (start_ms + duration_ms) / 1000
# Check if this caption overlaps with our time range
if start_time < end_seconds and end_time > start_seconds:
if 'segs' in event:
text = ''.join(seg.get('utf8', '') for seg in event['segs'])
selected_captions.append({
'start': start_time,
'end': end_time,
'text': text.strip()
})
return selected_captions
# Usage
yt = YouTube('https://www.youtube.com/watch?v=9bZkp7q19f0')
if 'en' in yt.captions:
caption = yt.captions['en']
# Get captions for first 60 seconds
timerange_captions = extract_captions_for_timerange(caption, 0, 60)
for cap in timerange_captions:
print(f"{cap['start']:.1f}s - {cap['end']:.1f}s: {cap['text']}")from typing import Dict, List, Optional, Iterator
# Caption track metadata structure
CaptionTrackDict = Dict[str, Any]
# JSON caption event structure
CaptionEvent = Dict[str, Any]Install with Tessl CLI
npx tessl i tessl/pypi-pytube