Tessl Tile for pypi/yt-dlp@2024.12.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

configuration.md core-download.md exceptions.md extractor-system.md index.md post-processing.md utilities.md

utilities.mddocs/

0
# Utility Functions
1

2
Comprehensive utility functions for file handling, data parsing, URL processing, format conversion, and template processing commonly needed when working with media downloads and extraction operations.
3

4
## Capabilities
5

6
### File and Path Operations
7

8
Functions for sanitizing filenames, handling paths, and managing file system operations.
9

10
```python { .api }
11
def sanitize_filename(s, restricted=False, is_id=False):
12
    """
13
    Sanitize filename by removing/replacing invalid characters.
14
    
15
    Parameters:
16
    - s: str, filename to sanitize
17
    - restricted: bool, use ASCII-only characters
18
    - is_id: bool, treat as video ID (more permissive)
19
    
20
    Returns:
21
    str: sanitized filename safe for file system
22
    """
23

24
def sanitize_path(s, force=False):
25
    """
26
    Sanitize file path by cleaning path components.
27
    
28
    Parameters:
29
    - s: str, path to sanitize
30
    - force: bool, force sanitization even if path exists
31
    
32
    Returns:
33
    str: sanitized path
34
    """
35

36
def expand_path(s):
37
    """
38
    Expand user path with ~ notation and environment variables.
39
    
40
    Parameters:
41
    - s: str, path to expand
42
    
43
    Returns:
44
    str: expanded path
45
    """
46

47
def shell_quote(args, *, shell_quote_wrapper=None):
48
    """
49
    Quote arguments for safe shell execution.
50
    
51
    Parameters:
52
    - args: str|list[str], arguments to quote
53
    - shell_quote_wrapper: callable|None, custom quoting function
54
    
55
    Returns:
56
    str: quoted arguments string
57
    """
58
```
59

60
### Data Parsing and Conversion
61

62
Functions for parsing various data formats and safely converting between types.
63

64
```python { .api }
65
def parse_duration(s):
66
    """
67
    Parse duration string to seconds.
68
    
69
    Supports formats like '1:23:45', '1h23m45s', '3600', etc.
70
    
71
    Parameters:
72
    - s: str, duration string
73
    
74
    Returns:
75
    int|None: duration in seconds, None if parsing fails
76
    """
77

78
def parse_bytes(s):
79
    """
80
    Parse byte size string to integer.
81
    
82
    Supports formats like '1.5GB', '500MB', '1024KB', etc.
83
    
84
    Parameters:
85
    - s: str, byte size string
86
    
87
    Returns:
88
    int|None: size in bytes, None if parsing fails
89
    """
90

91
def parse_filesize(s):
92
    """
93
    Parse file size string to integer bytes.
94
    
95
    Parameters:
96
    - s: str, file size string
97
    
98
    Returns:
99
    int|None: size in bytes, None if parsing fails
100
    """
101

102
def parse_resolution(s, *, lenient=False):
103
    """
104
    Parse resolution string to width/height tuple.
105
    
106
    Parameters:
107
    - s: str, resolution string like '1920x1080'
108
    - lenient: bool, allow lenient parsing
109
    
110
    Returns:
111
    tuple[int, int]|None: (width, height) or None if parsing fails
112
    """
113

114
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
115
    """
116
    Safe integer conversion with scaling.
117
    
118
    Parameters:
119
    - v: Any, value to convert
120
    - scale: int, scaling factor
121
    - default: Any, default if conversion fails
122
    - get_attr: str|None, attribute to get from v
123
    - invscale: int, inverse scaling factor
124
    
125
    Returns:
126
    int|Any: converted integer or default
127
    """
128

129
def float_or_none(v, scale=1, invscale=1, default=None):
130
    """
131
    Safe float conversion with scaling.
132
    
133
    Parameters:
134
    - v: Any, value to convert
135
    - scale: float, scaling factor
136
    - invscale: float, inverse scaling factor
137
    - default: Any, default if conversion fails
138
    
139
    Returns:
140
    float|Any: converted float or default
141
    """
142

143
def str_or_none(v, default=None):
144
    """
145
    Safe string conversion.
146
    
147
    Parameters:
148
    - v: Any, value to convert
149
    - default: Any, default if conversion fails
150
    
151
    Returns:
152
    str|Any: converted string or default
153
    """
154
```
155

156
### Date and Time Utilities
157

158
Functions for parsing, formatting, and manipulating dates and timestamps.
159

160
```python { .api }
161
def unified_strdate(date_str, day_first=True):
162
    """
163
    Parse date string to unified YYYYMMDD format.
164
    
165
    Parameters:
166
    - date_str: str, date string in various formats
167
    - day_first: bool, assume day comes before month in ambiguous cases
168
    
169
    Returns:
170
    str|None: date in YYYYMMDD format, None if parsing fails
171
    """
172

173
def unified_timestamp(date_str, day_first=True):
174
    """
175
    Parse date string to Unix timestamp.
176
    
177
    Parameters:
178
    - date_str: str, date string in various formats
179
    - day_first: bool, assume day comes before month in ambiguous cases
180
    
181
    Returns:
182
    int|None: Unix timestamp, None if parsing fails
183
    """
184

185
def formatSeconds(secs, delim=':'):
186
    """
187
    Format seconds as duration string.
188
    
189
    Parameters:
190
    - secs: int|float, seconds to format
191
    - delim: str, delimiter between time components
192
    
193
    Returns:
194
    str: formatted duration (e.g., '1:23:45')
195
    """
196
```
197

198
### HTML and Web Processing
199

200
Functions for processing HTML content and extracting information from web pages.
201

202
```python { .api }
203
def clean_html(html):
204
    """
205
    Remove HTML tags and decode entities.
206
    
207
    Parameters:
208
    - html: str, HTML content to clean
209
    
210
    Returns:
211
    str: cleaned text content
212
    """
213

214
def unescapeHTML(s):
215
    """
216
    Decode HTML entities in string.
217
    
218
    Parameters:
219
    - s: str, string with HTML entities
220
    
221
    Returns:
222
    str: decoded string
223
    """
224

225
def extract_attributes(html_element):
226
    """
227
    Extract attributes from HTML element string.
228
    
229
    Parameters:
230
    - html_element: str, HTML element as string
231
    
232
    Returns:
233
    dict[str, str]: attribute name-value pairs
234
    """
235

236
def get_element_by_id(id, html, **kwargs):
237
    """
238
    Extract HTML element by ID.
239
    
240
    Parameters:
241
    - id: str, element ID to find
242
    - html: str, HTML content to search
243
    - **kwargs: additional options
244
    
245
    Returns:
246
    str|None: element content or None if not found
247
    """
248
```
249

250
### Network and URL Utilities
251

252
Functions for processing URLs, handling network operations, and managing web requests.
253

254
```python { .api }
255
def sanitize_url(url, *, scheme='http'):
256
    """
257
    Clean and sanitize URL.
258
    
259
    Parameters:
260
    - url: str, URL to sanitize
261
    - scheme: str, default scheme if missing
262
    
263
    Returns:
264
    str: sanitized URL
265
    """
266

267
def url_basename(url):
268
    """
269
    Get basename (filename) from URL.
270
    
271
    Parameters:
272
    - url: str, URL to extract basename from
273
    
274
    Returns:
275
    str: basename of URL
276
    """
277

278
def urljoin(base, path):
279
    """
280
    Join base URL with path.
281
    
282
    Parameters:
283
    - base: str, base URL
284
    - path: str, path to join
285
    
286
    Returns:
287
    str: joined URL
288
    """
289

290
def smuggle_url(url, data):
291
    """
292
    Encode data into URL for internal passing.
293
    
294
    Parameters:
295
    - url: str, base URL
296
    - data: dict, data to encode
297
    
298
    Returns:
299
    str: URL with smuggled data
300
    """
301

302
def unsmuggle_url(smug_url, default=None):
303
    """
304
    Extract smuggled data from URL.
305
    
306
    Parameters:
307
    - smug_url: str, URL with smuggled data
308
    - default: Any, default if no data found
309
    
310
    Returns:
311
    tuple[str, Any]: (clean_url, extracted_data)
312
    """
313
```
314

315
### Format and Output Utilities
316

317
Functions for formatting data for display and managing output streams.
318

319
```python { .api }
320
def format_bytes(bytes):
321
    """
322
    Format byte count for human-readable display.
323
    
324
    Parameters:
325
    - bytes: int, byte count
326
    
327
    Returns:
328
    str: formatted byte string (e.g., '1.5 GB')
329
    """
330

331
def render_table(headers, rows, delim=' ', extra_gap=0, hide_empty=False):
332
    """
333
    Create formatted table string.
334
    
335
    Parameters:
336
    - headers: list[str], column headers
337
    - rows: list[list[str]], table rows
338
    - delim: str, column delimiter
339
    - extra_gap: int, extra spacing between columns
340
    - hide_empty: bool, hide empty columns
341
    
342
    Returns:
343
    str: formatted table
344
    """
345

346
def write_string(s, out=None, encoding=None):
347
    """
348
    Write string to output stream with proper encoding.
349
    
350
    Parameters:
351
    - s: str, string to write
352
    - out: file-like|None, output stream (default: stdout)
353
    - encoding: str|None, encoding to use
354
    """
355

356
def traverse_obj(obj, *paths, **kwargs):
357
    """
358
    Safely navigate nested objects with multiple path options.
359
    
360
    Parameters:
361
    - obj: Any, object to traverse
362
    - *paths: path specifications (strings, tuples, callables)
363
    - **kwargs: options like 'default', 'expected_type', etc.
364
    
365
    Returns:
366
    Any: value at path or default
367
    """
368
```
369

370
### Template and String Processing
371

372
Functions for processing output templates and manipulating strings.
373

374
```python { .api }
375
class FormatSorter:
376
    """
377
    Advanced format sorting with customizable criteria.
378
    
379
    Provides sophisticated format selection based on quality,
380
    codec preferences, file size, and other criteria.
381
    """
382
    
383
    def __init__(self, *args, **kwargs):
384
        """Initialize format sorter with criteria."""
385
    
386
    def evaluate(self, format_list):
387
        """
388
        Sort formats according to criteria.
389
        
390
        Parameters:
391
        - format_list: list[dict], formats to sort
392
        
393
        Returns:
394
        list[dict]: sorted formats
395
        """
396

397
def match_filter_func(filters, breaking_filters):
398
    """
399
    Create match filter function from filter expressions.
400
    
401
    Parameters:
402
    - filters: list[str], filter expressions
403
    - breaking_filters: list[str], breaking filter expressions
404
    
405
    Returns:
406
    callable: filter function
407
    """
408
```
409

410
## Usage Examples
411

412
### Filename Sanitization
413

414
```python
415
from yt_dlp.utils import sanitize_filename
416

417
# Basic sanitization
418
unsafe_name = "My Video: Part 1 (2024) <HD>.mp4"
419
safe_name = sanitize_filename(unsafe_name)
420
print(f"Safe filename: {safe_name}")
421
# Output: My Video꞉ Part 1 (2024) ⧸HD⧹.mp4
422

423
# Restricted ASCII-only sanitization
424
restricted_name = sanitize_filename(unsafe_name, restricted=True)
425
print(f"Restricted filename: {restricted_name}")
426
# Output: My_Video_Part_1_2024_HD.mp4
427
```
428

429
### Duration Parsing
430

431
```python
432
from yt_dlp.utils import parse_duration, formatSeconds
433

434
# Parse various duration formats
435
durations = ['1:23:45', '3600', '1h23m45s', '5003.7']
436
for duration_str in durations:
437
    seconds = parse_duration(duration_str)
438
    formatted = formatSeconds(seconds) if seconds else 'Invalid'
439
    print(f"{duration_str} -> {seconds}s -> {formatted}")
440
```
441

442
### Data Size Parsing
443

444
```python
445
from yt_dlp.utils import parse_bytes, format_bytes
446

447
# Parse file sizes
448
sizes = ['1.5GB', '500MB', '1024KB', '2048']
449
for size_str in sizes:
450
    bytes_count = parse_bytes(size_str)
451
    formatted = format_bytes(bytes_count) if bytes_count else 'Invalid'
452
    print(f"{size_str} -> {bytes_count} bytes -> {formatted}")
453
```
454

455
### Date Processing
456

457
```python
458
from yt_dlp.utils import unified_strdate, unified_timestamp
459
import datetime
460

461
# Parse dates
462
dates = ['2024-01-15', 'Jan 15, 2024', '15/01/2024']
463
for date_str in dates:
464
    unified = unified_strdate(date_str)
465
    timestamp = unified_timestamp(date_str)
466
    if timestamp:
467
        readable = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
468
        print(f"{date_str} -> {unified} -> {timestamp} -> {readable}")
469
```
470

471
### HTML Processing
472

473
```python
474
from yt_dlp.utils import clean_html, unescapeHTML
475

476
html_content = "&lt;p&gt;Video title with &amp;quot;quotes&amp;quot;&lt;/p&gt;"
477
decoded = unescapeHTML(html_content)
478
clean_text = clean_html(decoded)
479
print(f"Original: {html_content}")
480
print(f"Decoded: {decoded}")
481
print(f"Clean: {clean_text}")
482
```
483

484
### Safe Object Traversal
485

486
```python
487
from yt_dlp.utils import traverse_obj
488

489
# Complex nested data
490
data = {
491
    'video': {
492
        'metadata': {
493
            'title': 'Example Video',
494
            'stats': {'views': 1000000}
495
        },
496
        'formats': [
497
            {'quality': 'high', 'url': 'https://example.com/high.mp4'},
498
            {'quality': 'low', 'url': 'https://example.com/low.mp4'}
499
        ]
500
    }
501
}
502

503
# Safely extract nested values
504
title = traverse_obj(data, ('video', 'metadata', 'title'))
505
views = traverse_obj(data, ('video', 'metadata', 'stats', 'views'))
506
first_url = traverse_obj(data, ('video', 'formats', 0, 'url'))
507
missing = traverse_obj(data, ('video', 'missing', 'field'), default='Not found')
508

509
print(f"Title: {title}")
510
print(f"Views: {views}")
511
print(f"First URL: {first_url}")
512
print(f"Missing field: {missing}")
513
```
514

515
### Table Formatting
516

517
```python
518
from yt_dlp.utils import render_table
519

520
headers = ['Format', 'Quality', 'Size', 'Codec']
521
rows = [
522
    ['mp4', '1080p', '500MB', 'h264'],
523
    ['webm', '720p', '300MB', 'vp9'],
524
    ['mp4', '480p', '150MB', 'h264'],
525
]
526

527
table = render_table(headers, rows, delim=' | ', extra_gap=1)
528
print(table)
529
```
530

531
## Types
532

533
```python { .api }
534
# Date range class for filtering by date
535
class DateRange:
536
    def __init__(self, start=None, end=None): ...
537
    def day(cls, day): ...  # Create single-day range
538
    
539
# Configuration management class
540
class Config:
541
    def __init__(self): ...
542
    
543
# Format sorting and preference class
544
class FormatSorter:
545
    def __init__(self, extractor, field_preference=None): ...
546
    
547
# Configuration namespace class
548
class Namespace:
549
    def __init__(self, **kwargs): ...
550

551
# Lazy list implementation for memory efficiency
552
class LazyList:
553
    def __init__(self, iterable): ...
554

555
# Paged list for handling large datasets
556
class PagedList:
557
    def __init__(self, pagefunc, pagesize): ...
558

559
# Playlist entry parser
560
class PlaylistEntries:
561
    @staticmethod
562
    def parse_playlist_items(spec): ...
563

564
# Geographic utilities
565
class GeoUtils:
566
    @staticmethod
567
    def random_ipv4(code): ...
568

569
# ISO country code utilities
570
class ISO3166Utils:
571
    @staticmethod
572
    def short2full(code): ...
573

574
# Sentinel object for no default value
575
NO_DEFAULT = object()
576
```

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/