or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-validation.mdindex.mdoptions-configuration.mdplugin-system.mdsession-management.mdstream-access.mdutilities.md

data-validation.mddocs/

0

# Data Validation

1

2

Streamlink provides a comprehensive schema-based validation system for parsing HTML, JSON, XML and validating data structures. The validation system uses combinators and type-specific validators to ensure data integrity during plugin development.

3

4

## Capabilities

5

6

### Core Validation Function

7

8

The main validation function that applies schemas to data objects.

9

10

```python { .api }

11

def validate(obj, schema):

12

"""

13

Validate an object against a schema.

14

15

Parameters:

16

- obj: Object to validate

17

- schema: Validation schema (function, type, or validator)

18

19

Returns:

20

Validated and potentially transformed object

21

22

Raises:

23

ValidationError: If validation fails

24

"""

25

26

Schema = callable # Schema type alias

27

```

28

29

### Schema Combinators

30

31

Functions that combine multiple schemas for complex validation logic.

32

33

```python { .api }

34

def all(*schemas):

35

"""

36

All schemas must pass validation.

37

38

Parameters:

39

- *schemas: Schemas to apply in sequence

40

41

Returns:

42

Result of the last schema

43

"""

44

45

def any(*schemas):

46

"""

47

At least one schema must pass validation.

48

49

Parameters:

50

- *schemas: Schemas to try in order

51

52

Returns:

53

Result of the first successful schema

54

"""

55

56

def none_or_all(*schemas):

57

"""

58

Either None or all schemas must pass.

59

60

Parameters:

61

- *schemas: Schemas to apply if object is not None

62

63

Returns:

64

None if input is None, otherwise result of all(*schemas)

65

"""

66

67

def optional(schema):

68

"""

69

Schema is optional - passes None through unchanged.

70

71

Parameters:

72

- schema: Schema to apply if object is not None

73

74

Returns:

75

None if input is None, otherwise result of schema

76

"""

77

78

def transform(func, *schemas):

79

"""

80

Transform object with function then apply schemas.

81

82

Parameters:

83

- func: Transformation function

84

- *schemas: Schemas to apply to transformed object

85

86

Returns:

87

Result of applying schemas to func(obj)

88

"""

89

90

def list(schema):

91

"""

92

Validate each element in a list.

93

94

Parameters:

95

- schema: Schema to apply to each list element

96

97

Returns:

98

List with each element validated by schema

99

"""

100

101

def union(*schemas):

102

"""

103

Union of schemas - first successful schema wins.

104

105

Parameters:

106

- *schemas: Schemas to try in order

107

108

Returns:

109

Result of first successful schema

110

"""

111

112

def union_get(*schemas):

113

"""

114

Union schemas with get operations.

115

116

Parameters:

117

- *schemas: Get schemas to try in order

118

119

Returns:

120

Result of first successful get schema

121

"""

122

123

def regex(pattern, **kwargs):

124

"""

125

Validate string against regular expression.

126

127

Parameters:

128

- pattern: Regular expression pattern

129

- **kwargs: Additional regex options

130

131

Returns:

132

Match groups or original string

133

"""

134

135

def xml_element(tag=None, **kwargs):

136

"""

137

Validate XML element structure.

138

139

Parameters:

140

- tag: Expected tag name (optional)

141

- **kwargs: Additional validation options

142

143

Returns:

144

Validated XML element

145

"""

146

```

147

148

### Data Access Validators

149

150

Validators for extracting and validating data from nested structures.

151

152

```python { .api }

153

def attr(attr, schema, default=None):

154

"""

155

Validate object attribute.

156

157

Parameters:

158

- attr: Attribute name to extract

159

- schema: Schema to apply to attribute value

160

- default: Default value if attribute missing

161

162

Returns:

163

Validated attribute value

164

"""

165

166

def get(item, schema, default=None):

167

"""

168

Validate dictionary/list item.

169

170

Parameters:

171

- item: Key/index to extract

172

- schema: Schema to apply to item value

173

- default: Default value if item missing

174

175

Returns:

176

Validated item value

177

"""

178

```

179

180

### String Validators

181

182

Validators for string content and format checking.

183

184

```python { .api }

185

def contains(item):

186

"""

187

Check if string contains specified item.

188

189

Parameters:

190

- item: Substring to search for

191

192

Returns:

193

Original string if contains item

194

"""

195

196

def startswith(prefix):

197

"""

198

Check if string starts with prefix.

199

200

Parameters:

201

- prefix: Required string prefix

202

203

Returns:

204

Original string if starts with prefix

205

"""

206

207

def endswith(suffix):

208

"""

209

Check if string ends with suffix.

210

211

Parameters:

212

- suffix: Required string suffix

213

214

Returns:

215

Original string if ends with suffix

216

"""

217

218

def length(min_len, max_len=None):

219

"""

220

Validate string or collection length.

221

222

Parameters:

223

- min_len: Minimum required length

224

- max_len: Maximum allowed length (optional)

225

226

Returns:

227

Original object if length is valid

228

"""

229

230

def getattr(attr, default=None):

231

"""

232

Get object attribute for validation.

233

234

Parameters:

235

- attr: Attribute name to extract

236

- default: Default value if attribute missing

237

238

Returns:

239

Attribute value

240

"""

241

242

def hasattr(attr):

243

"""

244

Check if object has specified attribute.

245

246

Parameters:

247

- attr: Attribute name to check

248

249

Returns:

250

Original object if attribute exists

251

"""

252

253

def filter(func):

254

"""

255

Filter collection elements using function.

256

257

Parameters:

258

- func: Filter function (returns bool)

259

260

Returns:

261

Filtered collection

262

"""

263

264

def map(func):

265

"""

266

Apply function to each element in collection.

267

268

Parameters:

269

- func: Mapping function

270

271

Returns:

272

Collection with mapped elements

273

"""

274

```

275

276

### Parsing Validators

277

278

Validators that parse and validate different data formats.

279

280

```python { .api }

281

def url(**kwargs):

282

"""

283

Validate and parse URLs.

284

285

Parameters:

286

- **kwargs: Additional validation options

287

288

Returns:

289

Parsed and validated URL

290

"""

291

292

def parse_html(**kwargs):

293

"""

294

Parse HTML content using lxml.

295

296

Parameters:

297

- **kwargs: lxml.html parsing options

298

299

Returns:

300

Parsed HTML element tree

301

"""

302

303

def parse_json(**kwargs):

304

"""

305

Parse JSON data.

306

307

Parameters:

308

- **kwargs: json.loads() options

309

310

Returns:

311

Parsed JSON object

312

"""

313

314

def parse_xml(**kwargs):

315

"""

316

Parse XML content using lxml.

317

318

Parameters:

319

- **kwargs: lxml.etree parsing options

320

321

Returns:

322

Parsed XML element tree

323

"""

324

```

325

326

### XML-Specific Validators

327

328

Specialized validators for XML content extraction.

329

330

```python { .api }

331

def xml_find(tag):

332

"""

333

Find first XML element matching tag.

334

335

Parameters:

336

- tag: XPath expression or tag name

337

338

Returns:

339

First matching XML element

340

"""

341

342

def xml_findall(tag):

343

"""

344

Find all XML elements matching tag.

345

346

Parameters:

347

- tag: XPath expression or tag name

348

349

Returns:

350

List of matching XML elements

351

"""

352

353

def xml_findtext(tag):

354

"""

355

Extract text content from XML element.

356

357

Parameters:

358

- tag: XPath expression or tag name

359

360

Returns:

361

Text content of first matching element

362

"""

363

364

def xml_xpath(expression):

365

"""

366

Execute XPath expression on XML element.

367

368

Parameters:

369

- expression: XPath expression string

370

371

Returns:

372

XPath query results

373

"""

374

375

def xml_xpath_string(expression):

376

"""

377

Execute XPath expression and return string result.

378

379

Parameters:

380

- expression: XPath expression string

381

382

Returns:

383

String result of XPath query

384

"""

385

386

def parse_qsd(**kwargs):

387

"""

388

Parse query string data.

389

390

Parameters:

391

- **kwargs: Query string parsing options

392

393

Returns:

394

Parsed query string dictionary

395

"""

396

```

397

398

## Usage Examples

399

400

### Basic Validation

401

402

```python

403

from streamlink.validate import validate, all, optional

404

405

# Simple type validation

406

validate("hello", str) # Returns "hello"

407

validate(42, int) # Returns 42

408

409

# Schema combinations

410

schema = all(str, length(1, 100))

411

validate("valid string", schema) # Success

412

413

# Optional validation

414

optional_schema = optional(all(str, startswith("http")))

415

validate(None, optional_schema) # Returns None

416

validate("https://example.com", optional_schema) # Success

417

```

418

419

### HTML Parsing and Validation

420

421

```python

422

from streamlink.validate import validate, parse_html, xml_find, xml_findtext

423

424

html_content = """

425

<html>

426

<body>

427

<div class="video-container">

428

<video src="https://example.com/video.mp4" />

429

<div class="title">Video Title</div>

430

</div>

431

</body>

432

</html>

433

"""

434

435

# Parse HTML and extract video URL

436

schema = all(

437

parse_html(),

438

xml_find(".//video"),

439

attr("src", all(str, url()))

440

)

441

442

video_url = validate(html_content, schema)

443

print(video_url) # "https://example.com/video.mp4"

444

445

# Extract title text

446

title_schema = all(

447

parse_html(),

448

xml_findtext(".//div[@class='title']"),

449

str

450

)

451

452

title = validate(html_content, title_schema)

453

print(title) # "Video Title"

454

```

455

456

### JSON Data Validation

457

458

```python

459

from streamlink.validate import validate, parse_json, get, list

460

461

json_data = '''

462

{

463

"streams": [

464

{"quality": "720p", "url": "https://example.com/720p.m3u8"},

465

{"quality": "1080p", "url": "https://example.com/1080p.m3u8"}

466

],

467

"title": "Stream Title"

468

}

469

'''

470

471

# Validate stream data structure

472

stream_schema = all(

473

parse_json(),

474

get("streams", list(all(

475

dict,

476

get("quality", str),

477

get("url", all(str, url()))

478

)))

479

)

480

481

streams = validate(json_data, stream_schema)

482

for stream in streams:

483

print(f"{stream['quality']}: {stream['url']}")

484

```

485

486

### Complex Plugin Validation

487

488

```python

489

from streamlink.validate import *

490

491

class ExamplePlugin(Plugin):

492

def _extract_streams(self):

493

# Fetch webpage

494

res = self.session.http.get(self.url)

495

496

# Validate and extract stream data

497

schema = all(

498

parse_html(),

499

xml_find(".//script[contains(text(), 'videoData')]"),

500

attr("text", all(

501

str,

502

# Extract JSON from script tag

503

transform(lambda x: x.split('videoData = ')[1].split(';')[0], str),

504

parse_json(),

505

# Validate JSON structure

506

all(

507

dict,

508

get("streams", list(all(

509

dict,

510

get("format", str),

511

get("url", all(str, url())),

512

get("quality", any(str, int))

513

))),

514

get("title", optional(str))

515

)

516

))

517

)

518

519

try:

520

data = validate(res.text, schema)

521

return self._create_streams(data["streams"])

522

except ValidationError as err:

523

raise PluginError(f"Failed to extract stream data: {err}")

524

```

525

526

### URL and Format Validation

527

528

```python

529

from streamlink.validate import validate, url, any, endswith

530

531

# URL validation with format checking

532

m3u8_schema = all(str, url(), endswith('.m3u8'))

533

mpd_schema = all(str, url(), endswith('.mpd'))

534

stream_url_schema = any(m3u8_schema, mpd_schema)

535

536

# Validate different stream URLs

537

validate("https://example.com/stream.m3u8", stream_url_schema) # Success

538

validate("https://example.com/stream.mpd", stream_url_schema) # Success

539

540

# Custom URL validation

541

api_url_schema = all(

542

str,

543

url(),

544

contains('/api/'),

545

startswith('https://')

546

)

547

548

validate("https://api.example.com/api/streams", api_url_schema) # Success

549

```

550

551

### Advanced Schema Composition

552

553

```python

554

from streamlink.validate import *

555

556

# Create reusable schema components

557

quality_schema = any("240p", "360p", "480p", "720p", "1080p", "best", "worst")

558

559

stream_schema = all(dict, {

560

"url": all(str, url()),

561

"quality": quality_schema,

562

"format": optional(any("hls", "dash", "http")),

563

"bitrate": optional(any(int, float))

564

})

565

566

playlist_schema = all(dict, {

567

"title": optional(str),

568

"thumbnail": optional(all(str, url())),

569

"duration": optional(int),

570

"streams": list(stream_schema)

571

})

572

573

# Use composed schema

574

data = {

575

"title": "Example Stream",

576

"streams": [

577

{"url": "https://example.com/720p.m3u8", "quality": "720p", "format": "hls"},

578

{"url": "https://example.com/1080p.m3u8", "quality": "1080p", "format": "hls"}

579

]

580

}

581

582

validated_data = validate(data, playlist_schema)

583

```

584

585

### Error Handling in Validation

586

587

```python

588

from streamlink.validate import validate, ValidationError

589

590

def safe_validate(obj, schema, default=None):

591

"""Safely validate with fallback value"""

592

try:

593

return validate(obj, schema)

594

except ValidationError:

595

return default

596

597

# Use in plugin

598

def _extract_video_id(self, html):

599

video_id_schema = all(

600

parse_html(),

601

xml_findtext(".//meta[@property='video:id']/@content"),

602

str,

603

length(1)

604

)

605

606

return safe_validate(html, video_id_schema, "unknown")

607

```