Tessl Tile for pypi/youtube-transcript-api@1.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-api.md data-structures.md error-handling.md formatters.md index.md proxy-config.md

data-structures.mddocs/

0
# Data Structures
1

2
Core data classes for representing transcript metadata, collections, and content. These structures provide the foundation for all transcript operations in the library.
3

4
## Capabilities
5

6
### TranscriptList
7

8
Container for all available transcripts for a specific video. Provides methods to search and filter transcripts by language and type (manual vs. generated).
9

10
```python { .api }
11
class TranscriptList:
12
    def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):
13
        """
14
        Internal constructor. Use YouTubeTranscriptApi.list() to create instances.
15

16
        Args:
17
            video_id (str): Video ID this list belongs to
18
            manually_created_transcripts (dict): Manual transcripts by language code
19
            generated_transcripts (dict): Generated transcripts by language code
20
            translation_languages (list): Available translation languages
21
        """
22

23
    def find_transcript(self, language_codes):
24
        """
25
        Find transcript with language priority. Prefers manual over generated.
26

27
        Args:
28
            language_codes (Iterable[str]): Language codes in priority order
29

30
        Returns:
31
            Transcript: First matching transcript found
32

33
        Raises:
34
            NoTranscriptFound: No transcript found for any requested language
35
        """
36

37
    def find_generated_transcript(self, language_codes):
38
        """
39
        Find automatically generated transcript.
40

41
        Args:
42
            language_codes (Iterable[str]): Language codes in priority order
43

44
        Returns:
45
            Transcript: First matching generated transcript
46

47
        Raises:
48
            NoTranscriptFound: No generated transcript found
49
        """
50

51
    def find_manually_created_transcript(self, language_codes):
52
        """
53
        Find manually created transcript.
54

55
        Args:
56
            language_codes (Iterable[str]): Language codes in priority order
57

58
        Returns:
59
            Transcript: First matching manual transcript
60

61
        Raises:
62
            NoTranscriptFound: No manual transcript found
63
        """
64

65
    def __iter__(self):
66
        """
67
        Iterate over all transcripts (manual first, then generated).
68

69
        Yields:
70
            Transcript: Each available transcript
71
        """
72

73
    @property
74
    def video_id(self):
75
        """str: Video ID this transcript list belongs to"""
76
```
77

78
### Transcript
79

80
Metadata and fetching interface for an individual transcript. Represents a specific language version of a video's subtitles.
81

82
```python { .api }
83
class Transcript:
84
    def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):
85
        """
86
        Internal constructor. Access via TranscriptList methods.
87
        """
88

89
    def fetch(self, preserve_formatting=False):
90
        """
91
        Load the actual transcript content.
92

93
        Args:
94
            preserve_formatting (bool, optional): Keep HTML formatting tags. Defaults to False
95

96
        Returns:
97
            FetchedTranscript: Transcript with content and timing data
98

99
        Raises:
100
            PoTokenRequired: PO token required for this video
101
            YouTubeRequestFailed: HTTP request failed
102
        """
103

104
    def translate(self, language_code):
105
        """
106
        Create translated version of this transcript.
107

108
        Args:
109
            language_code (str): Target language code for translation
110

111
        Returns:
112
            Transcript: New transcript object for translated version
113

114
        Raises:
115
            NotTranslatable: This transcript cannot be translated
116
            TranslationLanguageNotAvailable: Requested language not available
117
        """
118

119
    @property
120
    def video_id(self):
121
        """str: Video ID this transcript belongs to"""
122

123
    @property
124
    def language(self):
125
        """str: Human-readable language name"""
126

127
    @property
128
    def language_code(self):
129
        """str: Language code (e.g., 'en', 'es', 'fr')"""
130

131
    @property
132
    def is_generated(self):
133
        """bool: True if automatically generated, False if manually created"""
134

135
    @property
136
    def translation_languages(self):
137
        """list: Available languages for translation"""
138

139
    @property
140
    def is_translatable(self):
141
        """bool: True if this transcript can be translated"""
142
```
143

144
### FetchedTranscript
145

146
Complete transcript data with timing information. Contains the actual subtitle content as a sequence of time-stamped text snippets.
147

148
```python { .api }
149
class FetchedTranscript:
150
    def __init__(self, snippets, video_id, language, language_code, is_generated):
151
        """
152
        Fetched transcript with content. Created by Transcript.fetch().
153

154
        Args:
155
            snippets (List[FetchedTranscriptSnippet]): Transcript content
156
            video_id (str): Video ID
157
            language (str): Language name
158
            language_code (str): Language code
159
            is_generated (bool): Whether auto-generated
160
        """
161

162
    def to_raw_data(self):
163
        """
164
        Convert to raw dictionary format for serialization.
165

166
        Returns:
167
            List[Dict]: List of snippet dictionaries with text, start, duration
168
        """
169

170
    def __iter__(self):
171
        """
172
        Iterate over transcript snippets.
173

174
        Yields:
175
            FetchedTranscriptSnippet: Each text snippet with timing
176
        """
177

178
    def __getitem__(self, index):
179
        """
180
        Access snippet by index.
181

182
        Args:
183
            index (int): Snippet index
184

185
        Returns:
186
            FetchedTranscriptSnippet: Snippet at index
187
        """
188

189
    def __len__(self):
190
        """
191
        Get number of snippets.
192

193
        Returns:
194
            int: Number of transcript snippets
195
        """
196

197
    @property
198
    def snippets(self):
199
        """List[FetchedTranscriptSnippet]: All transcript snippets"""
200

201
    @property
202
    def video_id(self):
203
        """str: Video ID this transcript belongs to"""
204

205
    @property
206
    def language(self):
207
        """str: Human-readable language name"""
208

209
    @property
210
    def language_code(self):
211
        """str: Language code"""
212

213
    @property
214
    def is_generated(self):
215
        """bool: True if automatically generated"""
216
```
217

218
### FetchedTranscriptSnippet
219

220
Individual text segment with precise timing information. Represents a single subtitle entry with start time and duration.
221

222
```python { .api }
223
class FetchedTranscriptSnippet:
224
    def __init__(self, text, start, duration):
225
        """
226
        Single transcript snippet with timing.
227

228
        Args:
229
            text (str): Transcript text content
230
            start (float): Start timestamp in seconds
231
            duration (float): Duration in seconds (screen display time, not speech duration)
232
        """
233

234
    @property
235
    def text(self):
236
        """str: Transcript text content"""
237

238
    @property
239
    def start(self):
240
        """float: Start timestamp in seconds"""
241

242
    @property
243
    def duration(self):
244
        """float: Duration in seconds (screen display time)"""
245
```
246

247
## Usage Examples
248

249
### Working with TranscriptList
250

251
```python
252
from youtube_transcript_api import YouTubeTranscriptApi
253

254
api = YouTubeTranscriptApi()
255
transcript_list = api.list('dQw4w9WgXcQ')
256

257
# Print all available transcripts
258
print(f"Available transcripts for {transcript_list.video_id}:")
259
for transcript in transcript_list:
260
    print(f"  {transcript.language_code}: {transcript.language}")
261
    print(f"    Generated: {transcript.is_generated}")
262
    print(f"    Translatable: {transcript.is_translatable}")
263

264
# Find specific transcript types
265
try:
266
    manual_en = transcript_list.find_manually_created_transcript(['en'])
267
    print(f"Found manual English transcript: {manual_en.language}")
268
except NoTranscriptFound:
269
    print("No manual English transcript available")
270

271
try:
272
    auto_es = transcript_list.find_generated_transcript(['es'])
273
    print(f"Found generated Spanish transcript: {auto_es.language}")
274
except NoTranscriptFound:
275
    print("No generated Spanish transcript available")
276
```
277

278
### Working with Transcript Objects
279

280
```python
281
from youtube_transcript_api import YouTubeTranscriptApi
282

283
api = YouTubeTranscriptApi()
284
transcript_list = api.list('dQw4w9WgXcQ')
285
transcript = transcript_list.find_transcript(['en'])
286

287
print(f"Transcript info:")
288
print(f"  Video: {transcript.video_id}")
289
print(f"  Language: {transcript.language} ({transcript.language_code})")
290
print(f"  Generated: {transcript.is_generated}")
291
print(f"  Translatable: {transcript.is_translatable}")
292

293
# Fetch content
294
fetched = transcript.fetch()
295
print(f"Fetched {len(fetched)} snippets")
296

297
# Translate if possible
298
if transcript.is_translatable:
299
    french = transcript.translate('fr')
300
    french_content = french.fetch()
301
    print(f"Translated to French: {len(french_content)} snippets")
302
```
303

304
### Working with FetchedTranscript
305

306
```python
307
from youtube_transcript_api import YouTubeTranscriptApi
308

309
api = YouTubeTranscriptApi()
310
transcript = api.fetch('dQw4w9WgXcQ')
311

312
# Basic information
313
print(f"Video: {transcript.video_id}")
314
print(f"Language: {transcript.language}")
315
print(f"Total snippets: {len(transcript)}")
316

317
# Iterate through content
318
for i, snippet in enumerate(transcript):
319
    end_time = snippet.start + snippet.duration
320
    print(f"[{snippet.start:.2f}-{end_time:.2f}s] {snippet.text}")
321
    
322
    if i >= 5:  # Show first 5 snippets
323
        break
324

325
# Access specific snippets
326
first_snippet = transcript[0]
327
print(f"First snippet: '{first_snippet.text}' at {first_snippet.start}s")
328

329
# Convert to raw data for serialization
330
raw_data = transcript.to_raw_data()
331
print(f"Raw format: {raw_data[0]}")  # {'text': '...', 'start': 0.0, 'duration': 3.84}
332
```
333

334
## Types
335

336
```python { .api }
337
from typing import List, Dict, Iterator, Iterable
338
from dataclasses import dataclass
339

340
# Internal translation language type
341
@dataclass
342
class _TranslationLanguage:
343
    language: str
344
    language_code: str
345
```

Version

Tile

Files

data-structures.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-structures.mddocs/