or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-download.mdexceptions.mdextractor-system.mdindex.mdpost-processing.mdutilities.md

utilities.mddocs/

0

# Utility Functions

1

2

Comprehensive utility functions for file handling, data parsing, URL processing, format conversion, and template processing commonly needed when working with media downloads and extraction operations.

3

4

## Capabilities

5

6

### File and Path Operations

7

8

Functions for sanitizing filenames, handling paths, and managing file system operations.

9

10

```python { .api }

11

def sanitize_filename(s, restricted=False, is_id=False):

12

"""

13

Sanitize filename by removing/replacing invalid characters.

14

15

Parameters:

16

- s: str, filename to sanitize

17

- restricted: bool, use ASCII-only characters

18

- is_id: bool, treat as video ID (more permissive)

19

20

Returns:

21

str: sanitized filename safe for file system

22

"""

23

24

def sanitize_path(s, force=False):

25

"""

26

Sanitize file path by cleaning path components.

27

28

Parameters:

29

- s: str, path to sanitize

30

- force: bool, force sanitization even if path exists

31

32

Returns:

33

str: sanitized path

34

"""

35

36

def expand_path(s):

37

"""

38

Expand user path with ~ notation and environment variables.

39

40

Parameters:

41

- s: str, path to expand

42

43

Returns:

44

str: expanded path

45

"""

46

47

def shell_quote(args, *, shell_quote_wrapper=None):

48

"""

49

Quote arguments for safe shell execution.

50

51

Parameters:

52

- args: str|list[str], arguments to quote

53

- shell_quote_wrapper: callable|None, custom quoting function

54

55

Returns:

56

str: quoted arguments string

57

"""

58

```

59

60

### Data Parsing and Conversion

61

62

Functions for parsing various data formats and safely converting between types.

63

64

```python { .api }

65

def parse_duration(s):

66

"""

67

Parse duration string to seconds.

68

69

Supports formats like '1:23:45', '1h23m45s', '3600', etc.

70

71

Parameters:

72

- s: str, duration string

73

74

Returns:

75

int|None: duration in seconds, None if parsing fails

76

"""

77

78

def parse_bytes(s):

79

"""

80

Parse byte size string to integer.

81

82

Supports formats like '1.5GB', '500MB', '1024KB', etc.

83

84

Parameters:

85

- s: str, byte size string

86

87

Returns:

88

int|None: size in bytes, None if parsing fails

89

"""

90

91

def parse_filesize(s):

92

"""

93

Parse file size string to integer bytes.

94

95

Parameters:

96

- s: str, file size string

97

98

Returns:

99

int|None: size in bytes, None if parsing fails

100

"""

101

102

def parse_resolution(s, *, lenient=False):

103

"""

104

Parse resolution string to width/height tuple.

105

106

Parameters:

107

- s: str, resolution string like '1920x1080'

108

- lenient: bool, allow lenient parsing

109

110

Returns:

111

tuple[int, int]|None: (width, height) or None if parsing fails

112

"""

113

114

def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):

115

"""

116

Safe integer conversion with scaling.

117

118

Parameters:

119

- v: Any, value to convert

120

- scale: int, scaling factor

121

- default: Any, default if conversion fails

122

- get_attr: str|None, attribute to get from v

123

- invscale: int, inverse scaling factor

124

125

Returns:

126

int|Any: converted integer or default

127

"""

128

129

def float_or_none(v, scale=1, invscale=1, default=None):

130

"""

131

Safe float conversion with scaling.

132

133

Parameters:

134

- v: Any, value to convert

135

- scale: float, scaling factor

136

- invscale: float, inverse scaling factor

137

- default: Any, default if conversion fails

138

139

Returns:

140

float|Any: converted float or default

141

"""

142

143

def str_or_none(v, default=None):

144

"""

145

Safe string conversion.

146

147

Parameters:

148

- v: Any, value to convert

149

- default: Any, default if conversion fails

150

151

Returns:

152

str|Any: converted string or default

153

"""

154

```

155

156

### Date and Time Utilities

157

158

Functions for parsing, formatting, and manipulating dates and timestamps.

159

160

```python { .api }

161

def unified_strdate(date_str, day_first=True):

162

"""

163

Parse date string to unified YYYYMMDD format.

164

165

Parameters:

166

- date_str: str, date string in various formats

167

- day_first: bool, assume day comes before month in ambiguous cases

168

169

Returns:

170

str|None: date in YYYYMMDD format, None if parsing fails

171

"""

172

173

def unified_timestamp(date_str, day_first=True):

174

"""

175

Parse date string to Unix timestamp.

176

177

Parameters:

178

- date_str: str, date string in various formats

179

- day_first: bool, assume day comes before month in ambiguous cases

180

181

Returns:

182

int|None: Unix timestamp, None if parsing fails

183

"""

184

185

def formatSeconds(secs, delim=':'):

186

"""

187

Format seconds as duration string.

188

189

Parameters:

190

- secs: int|float, seconds to format

191

- delim: str, delimiter between time components

192

193

Returns:

194

str: formatted duration (e.g., '1:23:45')

195

"""

196

```

197

198

### HTML and Web Processing

199

200

Functions for processing HTML content and extracting information from web pages.

201

202

```python { .api }

203

def clean_html(html):

204

"""

205

Remove HTML tags and decode entities.

206

207

Parameters:

208

- html: str, HTML content to clean

209

210

Returns:

211

str: cleaned text content

212

"""

213

214

def unescapeHTML(s):

215

"""

216

Decode HTML entities in string.

217

218

Parameters:

219

- s: str, string with HTML entities

220

221

Returns:

222

str: decoded string

223

"""

224

225

def extract_attributes(html_element):

226

"""

227

Extract attributes from HTML element string.

228

229

Parameters:

230

- html_element: str, HTML element as string

231

232

Returns:

233

dict[str, str]: attribute name-value pairs

234

"""

235

236

def get_element_by_id(id, html, **kwargs):

237

"""

238

Extract HTML element by ID.

239

240

Parameters:

241

- id: str, element ID to find

242

- html: str, HTML content to search

243

- **kwargs: additional options

244

245

Returns:

246

str|None: element content or None if not found

247

"""

248

```

249

250

### Network and URL Utilities

251

252

Functions for processing URLs, handling network operations, and managing web requests.

253

254

```python { .api }

255

def sanitize_url(url, *, scheme='http'):

256

"""

257

Clean and sanitize URL.

258

259

Parameters:

260

- url: str, URL to sanitize

261

- scheme: str, default scheme if missing

262

263

Returns:

264

str: sanitized URL

265

"""

266

267

def url_basename(url):

268

"""

269

Get basename (filename) from URL.

270

271

Parameters:

272

- url: str, URL to extract basename from

273

274

Returns:

275

str: basename of URL

276

"""

277

278

def urljoin(base, path):

279

"""

280

Join base URL with path.

281

282

Parameters:

283

- base: str, base URL

284

- path: str, path to join

285

286

Returns:

287

str: joined URL

288

"""

289

290

def smuggle_url(url, data):

291

"""

292

Encode data into URL for internal passing.

293

294

Parameters:

295

- url: str, base URL

296

- data: dict, data to encode

297

298

Returns:

299

str: URL with smuggled data

300

"""

301

302

def unsmuggle_url(smug_url, default=None):

303

"""

304

Extract smuggled data from URL.

305

306

Parameters:

307

- smug_url: str, URL with smuggled data

308

- default: Any, default if no data found

309

310

Returns:

311

tuple[str, Any]: (clean_url, extracted_data)

312

"""

313

```

314

315

### Format and Output Utilities

316

317

Functions for formatting data for display and managing output streams.

318

319

```python { .api }

320

def format_bytes(bytes):

321

"""

322

Format byte count for human-readable display.

323

324

Parameters:

325

- bytes: int, byte count

326

327

Returns:

328

str: formatted byte string (e.g., '1.5 GB')

329

"""

330

331

def render_table(headers, rows, delim=' ', extra_gap=0, hide_empty=False):

332

"""

333

Create formatted table string.

334

335

Parameters:

336

- headers: list[str], column headers

337

- rows: list[list[str]], table rows

338

- delim: str, column delimiter

339

- extra_gap: int, extra spacing between columns

340

- hide_empty: bool, hide empty columns

341

342

Returns:

343

str: formatted table

344

"""

345

346

def write_string(s, out=None, encoding=None):

347

"""

348

Write string to output stream with proper encoding.

349

350

Parameters:

351

- s: str, string to write

352

- out: file-like|None, output stream (default: stdout)

353

- encoding: str|None, encoding to use

354

"""

355

356

def traverse_obj(obj, *paths, **kwargs):

357

"""

358

Safely navigate nested objects with multiple path options.

359

360

Parameters:

361

- obj: Any, object to traverse

362

- *paths: path specifications (strings, tuples, callables)

363

- **kwargs: options like 'default', 'expected_type', etc.

364

365

Returns:

366

Any: value at path or default

367

"""

368

```

369

370

### Template and String Processing

371

372

Functions for processing output templates and manipulating strings.

373

374

```python { .api }

375

class FormatSorter:

376

"""

377

Advanced format sorting with customizable criteria.

378

379

Provides sophisticated format selection based on quality,

380

codec preferences, file size, and other criteria.

381

"""

382

383

def __init__(self, *args, **kwargs):

384

"""Initialize format sorter with criteria."""

385

386

def evaluate(self, format_list):

387

"""

388

Sort formats according to criteria.

389

390

Parameters:

391

- format_list: list[dict], formats to sort

392

393

Returns:

394

list[dict]: sorted formats

395

"""

396

397

def match_filter_func(filters, breaking_filters):

398

"""

399

Create match filter function from filter expressions.

400

401

Parameters:

402

- filters: list[str], filter expressions

403

- breaking_filters: list[str], breaking filter expressions

404

405

Returns:

406

callable: filter function

407

"""

408

```

409

410

## Usage Examples

411

412

### Filename Sanitization

413

414

```python

415

from yt_dlp.utils import sanitize_filename

416

417

# Basic sanitization

418

unsafe_name = "My Video: Part 1 (2024) <HD>.mp4"

419

safe_name = sanitize_filename(unsafe_name)

420

print(f"Safe filename: {safe_name}")

421

# Output: My Video꞉ Part 1 (2024) ⧸HD⧹.mp4

422

423

# Restricted ASCII-only sanitization

424

restricted_name = sanitize_filename(unsafe_name, restricted=True)

425

print(f"Restricted filename: {restricted_name}")

426

# Output: My_Video_Part_1_2024_HD.mp4

427

```

428

429

### Duration Parsing

430

431

```python

432

from yt_dlp.utils import parse_duration, formatSeconds

433

434

# Parse various duration formats

435

durations = ['1:23:45', '3600', '1h23m45s', '5003.7']

436

for duration_str in durations:

437

seconds = parse_duration(duration_str)

438

formatted = formatSeconds(seconds) if seconds else 'Invalid'

439

print(f"{duration_str} -> {seconds}s -> {formatted}")

440

```

441

442

### Data Size Parsing

443

444

```python

445

from yt_dlp.utils import parse_bytes, format_bytes

446

447

# Parse file sizes

448

sizes = ['1.5GB', '500MB', '1024KB', '2048']

449

for size_str in sizes:

450

bytes_count = parse_bytes(size_str)

451

formatted = format_bytes(bytes_count) if bytes_count else 'Invalid'

452

print(f"{size_str} -> {bytes_count} bytes -> {formatted}")

453

```

454

455

### Date Processing

456

457

```python

458

from yt_dlp.utils import unified_strdate, unified_timestamp

459

import datetime

460

461

# Parse dates

462

dates = ['2024-01-15', 'Jan 15, 2024', '15/01/2024']

463

for date_str in dates:

464

unified = unified_strdate(date_str)

465

timestamp = unified_timestamp(date_str)

466

if timestamp:

467

readable = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')

468

print(f"{date_str} -> {unified} -> {timestamp} -> {readable}")

469

```

470

471

### HTML Processing

472

473

```python

474

from yt_dlp.utils import clean_html, unescapeHTML

475

476

html_content = "&lt;p&gt;Video title with &amp;quot;quotes&amp;quot;&lt;/p&gt;"

477

decoded = unescapeHTML(html_content)

478

clean_text = clean_html(decoded)

479

print(f"Original: {html_content}")

480

print(f"Decoded: {decoded}")

481

print(f"Clean: {clean_text}")

482

```

483

484

### Safe Object Traversal

485

486

```python

487

from yt_dlp.utils import traverse_obj

488

489

# Complex nested data

490

data = {

491

'video': {

492

'metadata': {

493

'title': 'Example Video',

494

'stats': {'views': 1000000}

495

},

496

'formats': [

497

{'quality': 'high', 'url': 'https://example.com/high.mp4'},

498

{'quality': 'low', 'url': 'https://example.com/low.mp4'}

499

]

500

}

501

}

502

503

# Safely extract nested values

504

title = traverse_obj(data, ('video', 'metadata', 'title'))

505

views = traverse_obj(data, ('video', 'metadata', 'stats', 'views'))

506

first_url = traverse_obj(data, ('video', 'formats', 0, 'url'))

507

missing = traverse_obj(data, ('video', 'missing', 'field'), default='Not found')

508

509

print(f"Title: {title}")

510

print(f"Views: {views}")

511

print(f"First URL: {first_url}")

512

print(f"Missing field: {missing}")

513

```

514

515

### Table Formatting

516

517

```python

518

from yt_dlp.utils import render_table

519

520

headers = ['Format', 'Quality', 'Size', 'Codec']

521

rows = [

522

['mp4', '1080p', '500MB', 'h264'],

523

['webm', '720p', '300MB', 'vp9'],

524

['mp4', '480p', '150MB', 'h264'],

525

]

526

527

table = render_table(headers, rows, delim=' | ', extra_gap=1)

528

print(table)

529

```

530

531

## Types

532

533

```python { .api }

534

# Date range class for filtering by date

535

class DateRange:

536

def __init__(self, start=None, end=None): ...

537

def day(cls, day): ... # Create single-day range

538

539

# Configuration management class

540

class Config:

541

def __init__(self): ...

542

543

# Format sorting and preference class

544

class FormatSorter:

545

def __init__(self, extractor, field_preference=None): ...

546

547

# Configuration namespace class

548

class Namespace:

549

def __init__(self, **kwargs): ...

550

551

# Lazy list implementation for memory efficiency

552

class LazyList:

553

def __init__(self, iterable): ...

554

555

# Paged list for handling large datasets

556

class PagedList:

557

def __init__(self, pagefunc, pagesize): ...

558

559

# Playlist entry parser

560

class PlaylistEntries:

561

@staticmethod

562

def parse_playlist_items(spec): ...

563

564

# Geographic utilities

565

class GeoUtils:

566

@staticmethod

567

def random_ipv4(code): ...

568

569

# ISO country code utilities

570

class ISO3166Utils:

571

@staticmethod

572

def short2full(code): ...

573

574

# Sentinel object for no default value

575

NO_DEFAULT = object()

576

```