or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-api.mddata-structures.mderror-handling.mdformatters.mdindex.mdproxy-config.md

data-structures.mddocs/

0

# Data Structures

1

2

Core data classes for representing transcript metadata, collections, and content. These structures provide the foundation for all transcript operations in the library.

3

4

## Capabilities

5

6

### TranscriptList

7

8

Container for all available transcripts for a specific video. Provides methods to search and filter transcripts by language and type (manual vs. generated).

9

10

```python { .api }

11

class TranscriptList:

12

def __init__(self, video_id, manually_created_transcripts, generated_transcripts, translation_languages):

13

"""

14

Internal constructor. Use YouTubeTranscriptApi.list() to create instances.

15

16

Args:

17

video_id (str): Video ID this list belongs to

18

manually_created_transcripts (dict): Manual transcripts by language code

19

generated_transcripts (dict): Generated transcripts by language code

20

translation_languages (list): Available translation languages

21

"""

22

23

def find_transcript(self, language_codes):

24

"""

25

Find transcript with language priority. Prefers manual over generated.

26

27

Args:

28

language_codes (Iterable[str]): Language codes in priority order

29

30

Returns:

31

Transcript: First matching transcript found

32

33

Raises:

34

NoTranscriptFound: No transcript found for any requested language

35

"""

36

37

def find_generated_transcript(self, language_codes):

38

"""

39

Find automatically generated transcript.

40

41

Args:

42

language_codes (Iterable[str]): Language codes in priority order

43

44

Returns:

45

Transcript: First matching generated transcript

46

47

Raises:

48

NoTranscriptFound: No generated transcript found

49

"""

50

51

def find_manually_created_transcript(self, language_codes):

52

"""

53

Find manually created transcript.

54

55

Args:

56

language_codes (Iterable[str]): Language codes in priority order

57

58

Returns:

59

Transcript: First matching manual transcript

60

61

Raises:

62

NoTranscriptFound: No manual transcript found

63

"""

64

65

def __iter__(self):

66

"""

67

Iterate over all transcripts (manual first, then generated).

68

69

Yields:

70

Transcript: Each available transcript

71

"""

72

73

@property

74

def video_id(self):

75

"""str: Video ID this transcript list belongs to"""

76

```

77

78

### Transcript

79

80

Metadata and fetching interface for an individual transcript. Represents a specific language version of a video's subtitles.

81

82

```python { .api }

83

class Transcript:

84

def __init__(self, http_client, video_id, url, language, language_code, is_generated, translation_languages):

85

"""

86

Internal constructor. Access via TranscriptList methods.

87

"""

88

89

def fetch(self, preserve_formatting=False):

90

"""

91

Load the actual transcript content.

92

93

Args:

94

preserve_formatting (bool, optional): Keep HTML formatting tags. Defaults to False

95

96

Returns:

97

FetchedTranscript: Transcript with content and timing data

98

99

Raises:

100

PoTokenRequired: PO token required for this video

101

YouTubeRequestFailed: HTTP request failed

102

"""

103

104

def translate(self, language_code):

105

"""

106

Create translated version of this transcript.

107

108

Args:

109

language_code (str): Target language code for translation

110

111

Returns:

112

Transcript: New transcript object for translated version

113

114

Raises:

115

NotTranslatable: This transcript cannot be translated

116

TranslationLanguageNotAvailable: Requested language not available

117

"""

118

119

@property

120

def video_id(self):

121

"""str: Video ID this transcript belongs to"""

122

123

@property

124

def language(self):

125

"""str: Human-readable language name"""

126

127

@property

128

def language_code(self):

129

"""str: Language code (e.g., 'en', 'es', 'fr')"""

130

131

@property

132

def is_generated(self):

133

"""bool: True if automatically generated, False if manually created"""

134

135

@property

136

def translation_languages(self):

137

"""list: Available languages for translation"""

138

139

@property

140

def is_translatable(self):

141

"""bool: True if this transcript can be translated"""

142

```

143

144

### FetchedTranscript

145

146

Complete transcript data with timing information. Contains the actual subtitle content as a sequence of time-stamped text snippets.

147

148

```python { .api }

149

class FetchedTranscript:

150

def __init__(self, snippets, video_id, language, language_code, is_generated):

151

"""

152

Fetched transcript with content. Created by Transcript.fetch().

153

154

Args:

155

snippets (List[FetchedTranscriptSnippet]): Transcript content

156

video_id (str): Video ID

157

language (str): Language name

158

language_code (str): Language code

159

is_generated (bool): Whether auto-generated

160

"""

161

162

def to_raw_data(self):

163

"""

164

Convert to raw dictionary format for serialization.

165

166

Returns:

167

List[Dict]: List of snippet dictionaries with text, start, duration

168

"""

169

170

def __iter__(self):

171

"""

172

Iterate over transcript snippets.

173

174

Yields:

175

FetchedTranscriptSnippet: Each text snippet with timing

176

"""

177

178

def __getitem__(self, index):

179

"""

180

Access snippet by index.

181

182

Args:

183

index (int): Snippet index

184

185

Returns:

186

FetchedTranscriptSnippet: Snippet at index

187

"""

188

189

def __len__(self):

190

"""

191

Get number of snippets.

192

193

Returns:

194

int: Number of transcript snippets

195

"""

196

197

@property

198

def snippets(self):

199

"""List[FetchedTranscriptSnippet]: All transcript snippets"""

200

201

@property

202

def video_id(self):

203

"""str: Video ID this transcript belongs to"""

204

205

@property

206

def language(self):

207

"""str: Human-readable language name"""

208

209

@property

210

def language_code(self):

211

"""str: Language code"""

212

213

@property

214

def is_generated(self):

215

"""bool: True if automatically generated"""

216

```

217

218

### FetchedTranscriptSnippet

219

220

Individual text segment with precise timing information. Represents a single subtitle entry with start time and duration.

221

222

```python { .api }

223

class FetchedTranscriptSnippet:

224

def __init__(self, text, start, duration):

225

"""

226

Single transcript snippet with timing.

227

228

Args:

229

text (str): Transcript text content

230

start (float): Start timestamp in seconds

231

duration (float): Duration in seconds (screen display time, not speech duration)

232

"""

233

234

@property

235

def text(self):

236

"""str: Transcript text content"""

237

238

@property

239

def start(self):

240

"""float: Start timestamp in seconds"""

241

242

@property

243

def duration(self):

244

"""float: Duration in seconds (screen display time)"""

245

```

246

247

## Usage Examples

248

249

### Working with TranscriptList

250

251

```python

252

from youtube_transcript_api import YouTubeTranscriptApi

253

254

api = YouTubeTranscriptApi()

255

transcript_list = api.list('dQw4w9WgXcQ')

256

257

# Print all available transcripts

258

print(f"Available transcripts for {transcript_list.video_id}:")

259

for transcript in transcript_list:

260

print(f" {transcript.language_code}: {transcript.language}")

261

print(f" Generated: {transcript.is_generated}")

262

print(f" Translatable: {transcript.is_translatable}")

263

264

# Find specific transcript types

265

try:

266

manual_en = transcript_list.find_manually_created_transcript(['en'])

267

print(f"Found manual English transcript: {manual_en.language}")

268

except NoTranscriptFound:

269

print("No manual English transcript available")

270

271

try:

272

auto_es = transcript_list.find_generated_transcript(['es'])

273

print(f"Found generated Spanish transcript: {auto_es.language}")

274

except NoTranscriptFound:

275

print("No generated Spanish transcript available")

276

```

277

278

### Working with Transcript Objects

279

280

```python

281

from youtube_transcript_api import YouTubeTranscriptApi

282

283

api = YouTubeTranscriptApi()

284

transcript_list = api.list('dQw4w9WgXcQ')

285

transcript = transcript_list.find_transcript(['en'])

286

287

print(f"Transcript info:")

288

print(f" Video: {transcript.video_id}")

289

print(f" Language: {transcript.language} ({transcript.language_code})")

290

print(f" Generated: {transcript.is_generated}")

291

print(f" Translatable: {transcript.is_translatable}")

292

293

# Fetch content

294

fetched = transcript.fetch()

295

print(f"Fetched {len(fetched)} snippets")

296

297

# Translate if possible

298

if transcript.is_translatable:

299

french = transcript.translate('fr')

300

french_content = french.fetch()

301

print(f"Translated to French: {len(french_content)} snippets")

302

```

303

304

### Working with FetchedTranscript

305

306

```python

307

from youtube_transcript_api import YouTubeTranscriptApi

308

309

api = YouTubeTranscriptApi()

310

transcript = api.fetch('dQw4w9WgXcQ')

311

312

# Basic information

313

print(f"Video: {transcript.video_id}")

314

print(f"Language: {transcript.language}")

315

print(f"Total snippets: {len(transcript)}")

316

317

# Iterate through content

318

for i, snippet in enumerate(transcript):

319

end_time = snippet.start + snippet.duration

320

print(f"[{snippet.start:.2f}-{end_time:.2f}s] {snippet.text}")

321

322

if i >= 5: # Show first 5 snippets

323

break

324

325

# Access specific snippets

326

first_snippet = transcript[0]

327

print(f"First snippet: '{first_snippet.text}' at {first_snippet.start}s")

328

329

# Convert to raw data for serialization

330

raw_data = transcript.to_raw_data()

331

print(f"Raw format: {raw_data[0]}") # {'text': '...', 'start': 0.0, 'duration': 3.84}

332

```

333

334

## Types

335

336

```python { .api }

337

from typing import List, Dict, Iterator, Iterable

338

from dataclasses import dataclass

339

340

# Internal translation language type

341

@dataclass

342

class _TranslationLanguage:

343

language: str

344

language_code: str

345

```