Command-line utility and Python library for extracting video streams from various streaming services
Streamlink provides a comprehensive schema-based validation system for parsing HTML, JSON, XML and validating data structures. The validation system uses combinators and type-specific validators to ensure data integrity during plugin development.
The main validation function that applies schemas to data objects.
def validate(obj, schema):
"""
Validate an object against a schema.
Parameters:
- obj: Object to validate
- schema: Validation schema (function, type, or validator)
Returns:
Validated and potentially transformed object
Raises:
ValidationError: If validation fails
"""
Schema = callable # Schema type aliasFunctions that combine multiple schemas for complex validation logic.
def all(*schemas):
"""
All schemas must pass validation.
Parameters:
- *schemas: Schemas to apply in sequence
Returns:
Result of the last schema
"""
def any(*schemas):
"""
At least one schema must pass validation.
Parameters:
- *schemas: Schemas to try in order
Returns:
Result of the first successful schema
"""
def none_or_all(*schemas):
"""
Either None or all schemas must pass.
Parameters:
- *schemas: Schemas to apply if object is not None
Returns:
None if input is None, otherwise result of all(*schemas)
"""
def optional(schema):
"""
Schema is optional - passes None through unchanged.
Parameters:
- schema: Schema to apply if object is not None
Returns:
None if input is None, otherwise result of schema
"""
def transform(func, *schemas):
"""
Transform object with function then apply schemas.
Parameters:
- func: Transformation function
- *schemas: Schemas to apply to transformed object
Returns:
Result of applying schemas to func(obj)
"""
def list(schema):
"""
Validate each element in a list.
Parameters:
- schema: Schema to apply to each list element
Returns:
List with each element validated by schema
"""
def union(*schemas):
"""
Union of schemas - first successful schema wins.
Parameters:
- *schemas: Schemas to try in order
Returns:
Result of first successful schema
"""
def union_get(*schemas):
"""
Union schemas with get operations.
Parameters:
- *schemas: Get schemas to try in order
Returns:
Result of first successful get schema
"""
def regex(pattern, **kwargs):
"""
Validate string against regular expression.
Parameters:
- pattern: Regular expression pattern
- **kwargs: Additional regex options
Returns:
Match groups or original string
"""
def xml_element(tag=None, **kwargs):
"""
Validate XML element structure.
Parameters:
- tag: Expected tag name (optional)
- **kwargs: Additional validation options
Returns:
Validated XML element
"""Validators for extracting and validating data from nested structures.
def attr(attr, schema, default=None):
"""
Validate object attribute.
Parameters:
- attr: Attribute name to extract
- schema: Schema to apply to attribute value
- default: Default value if attribute missing
Returns:
Validated attribute value
"""
def get(item, schema, default=None):
"""
Validate dictionary/list item.
Parameters:
- item: Key/index to extract
- schema: Schema to apply to item value
- default: Default value if item missing
Returns:
Validated item value
"""Validators for string content and format checking.
def contains(item):
"""
Check if string contains specified item.
Parameters:
- item: Substring to search for
Returns:
Original string if contains item
"""
def startswith(prefix):
"""
Check if string starts with prefix.
Parameters:
- prefix: Required string prefix
Returns:
Original string if starts with prefix
"""
def endswith(suffix):
"""
Check if string ends with suffix.
Parameters:
- suffix: Required string suffix
Returns:
Original string if ends with suffix
"""
def length(min_len, max_len=None):
"""
Validate string or collection length.
Parameters:
- min_len: Minimum required length
- max_len: Maximum allowed length (optional)
Returns:
Original object if length is valid
"""
def getattr(attr, default=None):
"""
Get object attribute for validation.
Parameters:
- attr: Attribute name to extract
- default: Default value if attribute missing
Returns:
Attribute value
"""
def hasattr(attr):
"""
Check if object has specified attribute.
Parameters:
- attr: Attribute name to check
Returns:
Original object if attribute exists
"""
def filter(func):
"""
Filter collection elements using function.
Parameters:
- func: Filter function (returns bool)
Returns:
Filtered collection
"""
def map(func):
"""
Apply function to each element in collection.
Parameters:
- func: Mapping function
Returns:
Collection with mapped elements
"""Validators that parse and validate different data formats.
def url(**kwargs):
"""
Validate and parse URLs.
Parameters:
- **kwargs: Additional validation options
Returns:
Parsed and validated URL
"""
def parse_html(**kwargs):
"""
Parse HTML content using lxml.
Parameters:
- **kwargs: lxml.html parsing options
Returns:
Parsed HTML element tree
"""
def parse_json(**kwargs):
"""
Parse JSON data.
Parameters:
- **kwargs: json.loads() options
Returns:
Parsed JSON object
"""
def parse_xml(**kwargs):
"""
Parse XML content using lxml.
Parameters:
- **kwargs: lxml.etree parsing options
Returns:
Parsed XML element tree
"""Specialized validators for XML content extraction.
def xml_find(tag):
"""
Find first XML element matching tag.
Parameters:
- tag: XPath expression or tag name
Returns:
First matching XML element
"""
def xml_findall(tag):
"""
Find all XML elements matching tag.
Parameters:
- tag: XPath expression or tag name
Returns:
List of matching XML elements
"""
def xml_findtext(tag):
"""
Extract text content from XML element.
Parameters:
- tag: XPath expression or tag name
Returns:
Text content of first matching element
"""
def xml_xpath(expression):
"""
Execute XPath expression on XML element.
Parameters:
- expression: XPath expression string
Returns:
XPath query results
"""
def xml_xpath_string(expression):
"""
Execute XPath expression and return string result.
Parameters:
- expression: XPath expression string
Returns:
String result of XPath query
"""
def parse_qsd(**kwargs):
"""
Parse query string data.
Parameters:
- **kwargs: Query string parsing options
Returns:
Parsed query string dictionary
"""from streamlink.validate import validate, all, optional
# Simple type validation
validate("hello", str) # Returns "hello"
validate(42, int) # Returns 42
# Schema combinations
schema = all(str, length(1, 100))
validate("valid string", schema) # Success
# Optional validation
optional_schema = optional(all(str, startswith("http")))
validate(None, optional_schema) # Returns None
validate("https://example.com", optional_schema) # Successfrom streamlink.validate import validate, parse_html, xml_find, xml_findtext
html_content = """
<html>
<body>
<div class="video-container">
<video src="https://example.com/video.mp4" />
<div class="title">Video Title</div>
</div>
</body>
</html>
"""
# Parse HTML and extract video URL
schema = all(
parse_html(),
xml_find(".//video"),
attr("src", all(str, url()))
)
video_url = validate(html_content, schema)
print(video_url) # "https://example.com/video.mp4"
# Extract title text
title_schema = all(
parse_html(),
xml_findtext(".//div[@class='title']"),
str
)
title = validate(html_content, title_schema)
print(title) # "Video Title"from streamlink.validate import validate, parse_json, get, list
json_data = '''
{
"streams": [
{"quality": "720p", "url": "https://example.com/720p.m3u8"},
{"quality": "1080p", "url": "https://example.com/1080p.m3u8"}
],
"title": "Stream Title"
}
'''
# Validate stream data structure
stream_schema = all(
parse_json(),
get("streams", list(all(
dict,
get("quality", str),
get("url", all(str, url()))
)))
)
streams = validate(json_data, stream_schema)
for stream in streams:
print(f"{stream['quality']}: {stream['url']}")from streamlink.validate import *
class ExamplePlugin(Plugin):
def _extract_streams(self):
# Fetch webpage
res = self.session.http.get(self.url)
# Validate and extract stream data
schema = all(
parse_html(),
xml_find(".//script[contains(text(), 'videoData')]"),
attr("text", all(
str,
# Extract JSON from script tag
transform(lambda x: x.split('videoData = ')[1].split(';')[0], str),
parse_json(),
# Validate JSON structure
all(
dict,
get("streams", list(all(
dict,
get("format", str),
get("url", all(str, url())),
get("quality", any(str, int))
))),
get("title", optional(str))
)
))
)
try:
data = validate(res.text, schema)
return self._create_streams(data["streams"])
except ValidationError as err:
raise PluginError(f"Failed to extract stream data: {err}")from streamlink.validate import validate, url, any, endswith
# URL validation with format checking
m3u8_schema = all(str, url(), endswith('.m3u8'))
mpd_schema = all(str, url(), endswith('.mpd'))
stream_url_schema = any(m3u8_schema, mpd_schema)
# Validate different stream URLs
validate("https://example.com/stream.m3u8", stream_url_schema) # Success
validate("https://example.com/stream.mpd", stream_url_schema) # Success
# Custom URL validation
api_url_schema = all(
str,
url(),
contains('/api/'),
startswith('https://')
)
validate("https://api.example.com/api/streams", api_url_schema) # Successfrom streamlink.validate import *
# Create reusable schema components
quality_schema = any("240p", "360p", "480p", "720p", "1080p", "best", "worst")
stream_schema = all(dict, {
"url": all(str, url()),
"quality": quality_schema,
"format": optional(any("hls", "dash", "http")),
"bitrate": optional(any(int, float))
})
playlist_schema = all(dict, {
"title": optional(str),
"thumbnail": optional(all(str, url())),
"duration": optional(int),
"streams": list(stream_schema)
})
# Use composed schema
data = {
"title": "Example Stream",
"streams": [
{"url": "https://example.com/720p.m3u8", "quality": "720p", "format": "hls"},
{"url": "https://example.com/1080p.m3u8", "quality": "1080p", "format": "hls"}
]
}
validated_data = validate(data, playlist_schema)from streamlink.validate import validate, ValidationError
def safe_validate(obj, schema, default=None):
"""Safely validate with fallback value"""
try:
return validate(obj, schema)
except ValidationError:
return default
# Use in plugin
def _extract_video_id(self, html):
video_id_schema = all(
parse_html(),
xml_findtext(".//meta[@property='video:id']/@content"),
str,
length(1)
)
return safe_validate(html, video_id_schema, "unknown")Install with Tessl CLI
npx tessl i tessl/pypi-streamlink