CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-streamlink

Command-line utility and Python library for extracting video streams from various streaming services

Overview
Eval results
Files

data-validation.mddocs/

Data Validation

Streamlink provides a comprehensive schema-based validation system for parsing HTML, JSON, XML and validating data structures. The validation system uses combinators and type-specific validators to ensure data integrity during plugin development.

Capabilities

Core Validation Function

The main validation function that applies schemas to data objects.

def validate(obj, schema):
    """
    Validate an object against a schema.
    
    Parameters:
    - obj: Object to validate
    - schema: Validation schema (function, type, or validator)
    
    Returns:
    Validated and potentially transformed object
    
    Raises:
    ValidationError: If validation fails
    """

Schema = callable  # Schema type alias

Schema Combinators

Functions that combine multiple schemas for complex validation logic.

def all(*schemas):
    """
    All schemas must pass validation.
    
    Parameters:
    - *schemas: Schemas to apply in sequence
    
    Returns:
    Result of the last schema
    """

def any(*schemas):
    """
    At least one schema must pass validation.
    
    Parameters:
    - *schemas: Schemas to try in order
    
    Returns:
    Result of the first successful schema
    """

def none_or_all(*schemas):
    """
    Either None or all schemas must pass.
    
    Parameters:
    - *schemas: Schemas to apply if object is not None
    
    Returns:
    None if input is None, otherwise result of all(*schemas)
    """

def optional(schema):
    """
    Schema is optional - passes None through unchanged.
    
    Parameters:
    - schema: Schema to apply if object is not None
    
    Returns:
    None if input is None, otherwise result of schema
    """

def transform(func, *schemas):
    """
    Transform object with function then apply schemas.
    
    Parameters:  
    - func: Transformation function
    - *schemas: Schemas to apply to transformed object
    
    Returns:
    Result of applying schemas to func(obj)
    """

def list(schema):
    """
    Validate each element in a list.
    
    Parameters:
    - schema: Schema to apply to each list element
    
    Returns:
    List with each element validated by schema
    """

def union(*schemas):
    """
    Union of schemas - first successful schema wins.
    
    Parameters:
    - *schemas: Schemas to try in order
    
    Returns:
    Result of first successful schema
    """

def union_get(*schemas):
    """
    Union schemas with get operations.
    
    Parameters:
    - *schemas: Get schemas to try in order
    
    Returns:
    Result of first successful get schema
    """

def regex(pattern, **kwargs):
    """
    Validate string against regular expression.
    
    Parameters:
    - pattern: Regular expression pattern
    - **kwargs: Additional regex options
    
    Returns:
    Match groups or original string
    """

def xml_element(tag=None, **kwargs):
    """
    Validate XML element structure.
    
    Parameters:
    - tag: Expected tag name (optional)
    - **kwargs: Additional validation options
    
    Returns:
    Validated XML element
    """

Data Access Validators

Validators for extracting and validating data from nested structures.

def attr(attr, schema, default=None):
    """
    Validate object attribute.
    
    Parameters:
    - attr: Attribute name to extract
    - schema: Schema to apply to attribute value
    - default: Default value if attribute missing
    
    Returns:
    Validated attribute value
    """

def get(item, schema, default=None):
    """
    Validate dictionary/list item.
    
    Parameters:
    - item: Key/index to extract
    - schema: Schema to apply to item value
    - default: Default value if item missing
    
    Returns:
    Validated item value
    """

String Validators

Validators for string content and format checking.

def contains(item):
    """
    Check if string contains specified item.
    
    Parameters:
    - item: Substring to search for
    
    Returns:
    Original string if contains item
    """

def startswith(prefix):
    """
    Check if string starts with prefix.
    
    Parameters:
    - prefix: Required string prefix
    
    Returns:
    Original string if starts with prefix
    """

def endswith(suffix):
    """
    Check if string ends with suffix.
    
    Parameters:
    - suffix: Required string suffix
    
    Returns:
    Original string if ends with suffix
    """

def length(min_len, max_len=None):
    """
    Validate string or collection length.
    
    Parameters:
    - min_len: Minimum required length
    - max_len: Maximum allowed length (optional)
    
    Returns:
    Original object if length is valid
    """

def getattr(attr, default=None):
    """
    Get object attribute for validation.
    
    Parameters:
    - attr: Attribute name to extract  
    - default: Default value if attribute missing
    
    Returns:
    Attribute value
    """

def hasattr(attr):
    """
    Check if object has specified attribute.
    
    Parameters:
    - attr: Attribute name to check
    
    Returns:
    Original object if attribute exists
    """

def filter(func):
    """
    Filter collection elements using function.
    
    Parameters:
    - func: Filter function (returns bool)
    
    Returns:
    Filtered collection
    """

def map(func):
    """
    Apply function to each element in collection.
    
    Parameters:
    - func: Mapping function
    
    Returns:
    Collection with mapped elements
    """

Parsing Validators

Validators that parse and validate different data formats.

def url(**kwargs):
    """
    Validate and parse URLs.
    
    Parameters:
    - **kwargs: Additional validation options
    
    Returns:
    Parsed and validated URL
    """

def parse_html(**kwargs):
    """
    Parse HTML content using lxml.
    
    Parameters:
    - **kwargs: lxml.html parsing options
    
    Returns:
    Parsed HTML element tree
    """

def parse_json(**kwargs):
    """
    Parse JSON data.
    
    Parameters:
    - **kwargs: json.loads() options
    
    Returns:
    Parsed JSON object
    """

def parse_xml(**kwargs):
    """
    Parse XML content using lxml.
    
    Parameters:
    - **kwargs: lxml.etree parsing options
    
    Returns:
    Parsed XML element tree
    """

XML-Specific Validators

Specialized validators for XML content extraction.

def xml_find(tag):
    """
    Find first XML element matching tag.
    
    Parameters:
    - tag: XPath expression or tag name
    
    Returns:
    First matching XML element
    """

def xml_findall(tag):
    """
    Find all XML elements matching tag.
    
    Parameters:
    - tag: XPath expression or tag name
    
    Returns:
    List of matching XML elements
    """

def xml_findtext(tag):
    """
    Extract text content from XML element.
    
    Parameters:
    - tag: XPath expression or tag name
    
    Returns:
    Text content of first matching element
    """

def xml_xpath(expression):
    """
    Execute XPath expression on XML element.
    
    Parameters:
    - expression: XPath expression string
    
    Returns:
    XPath query results
    """

def xml_xpath_string(expression):
    """
    Execute XPath expression and return string result.
    
    Parameters:
    - expression: XPath expression string
    
    Returns:
    String result of XPath query
    """

def parse_qsd(**kwargs):
    """
    Parse query string data.
    
    Parameters:
    - **kwargs: Query string parsing options
    
    Returns:
    Parsed query string dictionary
    """

Usage Examples

Basic Validation

from streamlink.validate import validate, all, optional

# Simple type validation
validate("hello", str)  # Returns "hello"
validate(42, int)       # Returns 42

# Schema combinations
schema = all(str, length(1, 100))
validate("valid string", schema)  # Success

# Optional validation
optional_schema = optional(all(str, startswith("http")))
validate(None, optional_schema)      # Returns None
validate("https://example.com", optional_schema)  # Success

HTML Parsing and Validation

from streamlink.validate import validate, parse_html, xml_find, xml_findtext

html_content = """
<html>
    <body>
        <div class="video-container">
            <video src="https://example.com/video.mp4" />
            <div class="title">Video Title</div>
        </div>
    </body>
</html>
"""

# Parse HTML and extract video URL
schema = all(
    parse_html(),
    xml_find(".//video"),
    attr("src", all(str, url()))
)

video_url = validate(html_content, schema)
print(video_url)  # "https://example.com/video.mp4"

# Extract title text
title_schema = all(
    parse_html(),
    xml_findtext(".//div[@class='title']"),
    str
)

title = validate(html_content, title_schema)
print(title)  # "Video Title"

JSON Data Validation

from streamlink.validate import validate, parse_json, get, list

json_data = '''
{
    "streams": [
        {"quality": "720p", "url": "https://example.com/720p.m3u8"},
        {"quality": "1080p", "url": "https://example.com/1080p.m3u8"}
    ],
    "title": "Stream Title"
}
'''

# Validate stream data structure
stream_schema = all(
    parse_json(),
    get("streams", list(all(
        dict,
        get("quality", str),
        get("url", all(str, url()))
    )))
)

streams = validate(json_data, stream_schema)
for stream in streams:
    print(f"{stream['quality']}: {stream['url']}")

Complex Plugin Validation

from streamlink.validate import *

class ExamplePlugin(Plugin):
    def _extract_streams(self):
        # Fetch webpage
        res = self.session.http.get(self.url)
        
        # Validate and extract stream data
        schema = all(
            parse_html(),
            xml_find(".//script[contains(text(), 'videoData')]"),
            attr("text", all(
                str,
                # Extract JSON from script tag
                transform(lambda x: x.split('videoData = ')[1].split(';')[0], str),
                parse_json(),
                # Validate JSON structure
                all(
                    dict,
                    get("streams", list(all(
                        dict,
                        get("format", str),
                        get("url", all(str, url())),
                        get("quality", any(str, int))
                    ))),
                    get("title", optional(str))
                )
            ))
        )
        
        try:
            data = validate(res.text, schema)
            return self._create_streams(data["streams"])
        except ValidationError as err:
            raise PluginError(f"Failed to extract stream data: {err}")

URL and Format Validation

from streamlink.validate import validate, url, any, endswith

# URL validation with format checking
m3u8_schema = all(str, url(), endswith('.m3u8'))
mpd_schema = all(str, url(), endswith('.mpd'))
stream_url_schema = any(m3u8_schema, mpd_schema)

# Validate different stream URLs
validate("https://example.com/stream.m3u8", stream_url_schema)  # Success
validate("https://example.com/stream.mpd", stream_url_schema)   # Success

# Custom URL validation
api_url_schema = all(
    str,
    url(),
    contains('/api/'),
    startswith('https://')
)

validate("https://api.example.com/api/streams", api_url_schema)  # Success

Advanced Schema Composition

from streamlink.validate import *

# Create reusable schema components
quality_schema = any("240p", "360p", "480p", "720p", "1080p", "best", "worst")

stream_schema = all(dict, {
    "url": all(str, url()),
    "quality": quality_schema,
    "format": optional(any("hls", "dash", "http")),
    "bitrate": optional(any(int, float))
})

playlist_schema = all(dict, {
    "title": optional(str),
    "thumbnail": optional(all(str, url())),
    "duration": optional(int),
    "streams": list(stream_schema)
})

# Use composed schema
data = {
    "title": "Example Stream",
    "streams": [
        {"url": "https://example.com/720p.m3u8", "quality": "720p", "format": "hls"},
        {"url": "https://example.com/1080p.m3u8", "quality": "1080p", "format": "hls"}
    ]
}

validated_data = validate(data, playlist_schema)

Error Handling in Validation

from streamlink.validate import validate, ValidationError

def safe_validate(obj, schema, default=None):
    """Safely validate with fallback value"""
    try:
        return validate(obj, schema)
    except ValidationError:
        return default

# Use in plugin
def _extract_video_id(self, html):
    video_id_schema = all(
        parse_html(),
        xml_findtext(".//meta[@property='video:id']/@content"),
        str,
        length(1)
    )
    
    return safe_validate(html, video_id_schema, "unknown")

Install with Tessl CLI

npx tessl i tessl/pypi-streamlink

docs

data-validation.md

index.md

options-configuration.md

plugin-system.md

session-management.md

stream-access.md

utilities.md

tile.json