or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

downloaders.mdextractors.mdindex.mdmain-downloader.mdpost-processors.mdutilities.md

utilities.mddocs/

0

# Utilities and Helpers

1

2

Comprehensive utility functions for text processing, network operations, date parsing, file handling, and cross-platform compatibility. These utilities are used internally by youtube-dl and are available for external use.

3

4

## Capabilities

5

6

### Text Processing and Sanitization

7

8

Functions for cleaning and processing text content, filenames, and URLs.

9

10

```python { .api }

11

def sanitize_filename(s, restricted=False, is_id=False):

12

"""

13

Sanitize filename for filesystem compatibility.

14

15

Parameters:

16

- s (str): Input filename string

17

- restricted (bool): Use ASCII-only characters

18

- is_id (bool): Whether string is a video ID

19

20

Returns:

21

str: Sanitized filename

22

"""

23

24

def sanitize_path(s):

25

"""

26

Sanitize complete file path.

27

28

Parameters:

29

- s (str): Input path string

30

31

Returns:

32

str: Sanitized path

33

"""

34

35

def sanitize_url(url):

36

"""

37

Sanitize URL for HTTP requests.

38

39

Parameters:

40

- url (str): Input URL

41

42

Returns:

43

str: Sanitized URL

44

"""

45

46

def clean_html(html):

47

"""

48

Clean HTML content by removing tags and entities.

49

50

Parameters:

51

- html (str): HTML content

52

53

Returns:

54

str: Plain text content

55

"""

56

57

def unescapeHTML(s):

58

"""

59

Unescape HTML entities in string.

60

61

Parameters:

62

- s (str): String with HTML entities

63

64

Returns:

65

str: Unescaped string

66

"""

67

```

68

69

### File and Data Processing

70

71

Functions for handling file operations, data encoding, and format conversion.

72

73

```python { .api }

74

def format_bytes(bytes):

75

"""

76

Format byte count as human-readable string.

77

78

Parameters:

79

- bytes (int): Byte count

80

81

Returns:

82

str: Formatted string (e.g., '1.5 MB')

83

"""

84

85

def parse_filesize(s):

86

"""

87

Parse file size string into bytes.

88

89

Parameters:

90

- s (str): Size string (e.g., '1.5GB', '500MB')

91

92

Returns:

93

int: Size in bytes, or None if invalid

94

"""

95

96

def determine_ext(url, default_ext='unknown_video'):

97

"""

98

Determine file extension from URL.

99

100

Parameters:

101

- url (str): File URL

102

- default_ext (str): Default extension if undetermined

103

104

Returns:

105

str: File extension

106

"""

107

108

def encodeFilename(s, for_subprocess=False):

109

"""

110

Encode filename for filesystem operations.

111

112

Parameters:

113

- s (str): Filename string

114

- for_subprocess (bool): Encoding for subprocess calls

115

116

Returns:

117

bytes/str: Encoded filename

118

"""

119

120

def expand_path(s):

121

"""

122

Expand user home directory and environment variables in path.

123

124

Parameters:

125

- s (str): Path string

126

127

Returns:

128

str: Expanded path

129

"""

130

```

131

132

### Date and Time Processing

133

134

Functions for parsing and formatting dates and time durations.

135

136

```python { .api }

137

def unified_timestamp(date_str, day_first=True):

138

"""

139

Parse date string into Unix timestamp.

140

141

Parameters:

142

- date_str (str): Date string in various formats

143

- day_first (bool): Whether to interpret ambiguous dates as day-first

144

145

Returns:

146

int: Unix timestamp, or None if parsing fails

147

"""

148

149

def parse_iso8601(date_str, delimiter='T', colon=':'):

150

"""

151

Parse ISO 8601 date string.

152

153

Parameters:

154

- date_str (str): ISO 8601 date string

155

- delimiter (str): Date/time delimiter

156

- colon (str): Time component separator

157

158

Returns:

159

int: Unix timestamp

160

"""

161

162

def formatSeconds(secs, delim=':'):

163

"""

164

Format seconds as HH:MM:SS string.

165

166

Parameters:

167

- secs (int/float): Duration in seconds

168

- delim (str): Component delimiter

169

170

Returns:

171

str: Formatted duration string

172

"""

173

174

def parse_duration(s):

175

"""

176

Parse duration string into seconds.

177

178

Parameters:

179

- s (str): Duration string (e.g., '1:30', '90s', '1h30m')

180

181

Returns:

182

int: Duration in seconds

183

"""

184

```

185

186

### Network and HTTP Utilities

187

188

Functions for handling HTTP requests, cookies, and network operations.

189

190

```python { .api }

191

def sanitized_Request(url, *args, **kwargs):

192

"""

193

Create sanitized HTTP request object.

194

195

Parameters:

196

- url (str): Request URL

197

- *args: Additional positional arguments

198

- **kwargs: Additional keyword arguments

199

200

Returns:

201

Request: HTTP request object

202

"""

203

204

def make_HTTPS_handler(params, **kwargs):

205

"""

206

Create HTTPS handler with custom SSL context.

207

208

Parameters:

209

- params (dict): SSL parameters

210

- **kwargs: Additional SSL options

211

212

Returns:

213

HTTPSHandler: HTTPS handler instance

214

"""

215

216

def std_headers():

217

"""

218

Get standard HTTP headers for requests.

219

220

Returns:

221

dict: Standard headers dictionary

222

"""

223

224

class YoutubeDLCookieJar:

225

"""

226

Custom cookie jar implementation for youtube-dl.

227

"""

228

229

def __init__(self, filename=None):

230

"""

231

Initialize cookie jar.

232

233

Parameters:

234

- filename (str): Cookie file path

235

"""

236

```

237

238

### Data Structure Utilities

239

240

Functions for working with data structures and collections.

241

242

```python { .api }

243

def orderedSet(iterable):

244

"""

245

Create ordered set from iterable (preserves insertion order).

246

247

Parameters:

248

- iterable: Input iterable

249

250

Returns:

251

list: List with unique elements in order

252

"""

253

254

def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):

255

"""

256

Convert value to integer or return None.

257

258

Parameters:

259

- v: Input value

260

- scale (int): Scaling factor

261

- default: Default value if conversion fails

262

- get_attr (str): Attribute to extract from object

263

- invscale (int): Inverse scaling factor

264

265

Returns:

266

int/None: Converted integer or None

267

"""

268

269

def float_or_none(v, scale=1, invscale=1, default=None):

270

"""

271

Convert value to float or return None.

272

273

Parameters:

274

- v: Input value

275

- scale (float): Scaling factor

276

- invscale (float): Inverse scaling factor

277

- default: Default value if conversion fails

278

279

Returns:

280

float/None: Converted float or None

281

"""

282

283

def str_or_none(v, default=None):

284

"""

285

Convert value to string or return None.

286

287

Parameters:

288

- v: Input value

289

- default: Default value if conversion fails

290

291

Returns:

292

str/None: Converted string or None

293

"""

294

```

295

296

### Cryptography and Security

297

298

Functions for encryption, decryption, and security operations.

299

300

```python { .api }

301

def aes_encrypt(data, key, iv):

302

"""

303

AES encryption function.

304

305

Parameters:

306

- data (bytes): Data to encrypt

307

- key (bytes): Encryption key

308

- iv (bytes): Initialization vector

309

310

Returns:

311

bytes: Encrypted data

312

"""

313

314

def aes_decrypt(data, key, iv):

315

"""

316

AES decryption function.

317

318

Parameters:

319

- data (bytes): Encrypted data

320

- key (bytes): Decryption key

321

- iv (bytes): Initialization vector

322

323

Returns:

324

bytes: Decrypted data

325

"""

326

327

def pkcs1pad(data, length):

328

"""

329

Apply PKCS#1 padding to data.

330

331

Parameters:

332

- data (bytes): Input data

333

- length (int): Target length

334

335

Returns:

336

bytes: Padded data

337

"""

338

```

339

340

### Platform Compatibility

341

342

Functions for handling cross-platform compatibility issues.

343

344

```python { .api }

345

def preferredencoding():

346

"""

347

Get preferred text encoding for current platform.

348

349

Returns:

350

str: Encoding name (e.g., 'utf-8', 'cp1252')

351

"""

352

353

def write_string(s, out=None, encoding=None):

354

"""

355

Write string to output stream with proper encoding.

356

357

Parameters:

358

- s (str): String to write

359

- out: Output stream (default: sys.stdout)

360

- encoding (str): Text encoding

361

"""

362

363

def get_subprocess_encoding():

364

"""

365

Get appropriate encoding for subprocess operations.

366

367

Returns:

368

str: Encoding name

369

"""

370

371

def args_to_str(args):

372

"""

373

Convert argument list to command string.

374

375

Parameters:

376

- args (list): Argument list

377

378

Returns:

379

str: Command string

380

"""

381

```

382

383

### Error Handling Classes

384

385

Exception classes for different error conditions.

386

387

```python { .api }

388

class YoutubeDLError(Exception):

389

"""Base class for youtube-dl errors."""

390

391

def __init__(self, msg=None):

392

"""

393

Initialize error with message.

394

395

Parameters:

396

- msg (str): Error message

397

"""

398

399

class ExtractorError(YoutubeDLError):

400

"""Error during information extraction."""

401

402

def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):

403

"""

404

Initialize extractor error.

405

406

Parameters:

407

- msg (str): Error message

408

- tb (str): Traceback information

409

- expected (bool): Whether error was expected

410

- cause (Exception): Underlying exception

411

- video_id (str): Video identifier

412

- ie (str): Info extractor name

413

"""

414

415

class DownloadError(YoutubeDLError):

416

"""Error during file download."""

417

418

class PostProcessingError(YoutubeDLError):

419

"""Error during post-processing."""

420

421

class UnavailableVideoError(ExtractorError):

422

"""Video is not available."""

423

424

class ContentTooShortError(YoutubeDLError):

425

"""Downloaded content is shorter than expected."""

426

427

class GeoRestrictedError(ExtractorError):

428

"""Content is geo-restricted."""

429

430

class MaxDownloadsReached(YoutubeDLError):

431

"""Maximum download limit reached."""

432

```

433

434

## Usage Examples

435

436

### Filename Sanitization

437

```python

438

from youtube_dl.utils import sanitize_filename

439

440

# Sanitize for cross-platform compatibility

441

safe_name = sanitize_filename("Video: Title with/special\\chars")

442

print(safe_name) # "Video꞉ Title with⧸special⧹chars"

443

444

# ASCII-only mode

445

ascii_name = sanitize_filename("Vidéo: Título", restricted=True)

446

print(ascii_name) # "Video_ Titulo"

447

```

448

449

### File Size Processing

450

```python

451

from youtube_dl.utils import format_bytes, parse_filesize

452

453

# Format bytes as human-readable

454

print(format_bytes(1536000)) # "1.46MB"

455

456

# Parse size strings

457

size = parse_filesize("1.5GB")

458

print(size) # 1610612736

459

```

460

461

### Date Processing

462

```python

463

from youtube_dl.utils import unified_timestamp, formatSeconds

464

465

# Parse various date formats

466

timestamp = unified_timestamp("2021-12-17T15:30:00Z")

467

print(timestamp) # 1639751400

468

469

# Format duration

470

duration_str = formatSeconds(3665)

471

print(duration_str) # "1:01:05"

472

```

473

474

### Data Type Conversion

475

```python

476

from youtube_dl.utils import int_or_none, float_or_none

477

478

# Safe integer conversion

479

width = int_or_none("1920") # 1920

480

invalid = int_or_none("invalid") # None

481

482

# Safe float conversion with scaling

483

bitrate = float_or_none("128k", scale=1000) # 128000.0

484

```