cssselect parses CSS3 Selectors and translates them to XPath 1.0
npx @tessl/cli install tessl/pypi-cssselect@1.3.0cssselect is a Python library that parses CSS3 selectors and translates them to XPath 1.0 expressions. It enables developers to use CSS selector syntax to find matching elements in XML or HTML documents through XPath engines like lxml. The library provides a clean API for converting CSS selectors into XPath expressions, making it easier to work with HTML/XML parsing and element selection in Python applications.
pip install cssselectimport cssselectCommon usage patterns:
from cssselect import GenericTranslator, HTMLTranslator, parseFor accessing all public API components:
from cssselect import (
ExpressionError,
FunctionalPseudoElement,
GenericTranslator,
HTMLTranslator,
Selector,
SelectorError,
SelectorSyntaxError,
parse,
)from cssselect import GenericTranslator, HTMLTranslator
# Basic CSS to XPath translation
translator = GenericTranslator()
xpath = translator.css_to_xpath('div.content > p')
print(xpath) # "descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')]/p"
# HTML-specific translation with pseudo-class support
html_translator = HTMLTranslator()
xpath = html_translator.css_to_xpath('input:checked')
print(xpath) # XPath expression for checked input elements
# Parse selectors for inspection
from cssselect import parse
selectors = parse('div.content, #main')
for selector in selectors:
print(f"Selector: {selector.canonical()}")
print(f"Specificity: {selector.specificity()}")Parse CSS selector strings into structured Selector objects for analysis and manipulation.
def parse(css: str) -> list[Selector]:
"""
Parse a CSS group of selectors into Selector objects.
Parameters:
- css (str): A group of selectors as a string
Returns:
list[Selector]: List of parsed Selector objects
Raises:
SelectorSyntaxError: On invalid selectors
"""Translate CSS selectors to XPath expressions for generic XML documents with case-sensitive matching.
class GenericTranslator:
"""
Translator for generic XML documents.
Everything is case-sensitive, no assumption is made on the meaning
of element names and attribute names.
"""
def __init__(self):
"""Initialize a GenericTranslator instance."""
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
"""
Translate a group of selectors to XPath.
Parameters:
- css (str): A group of selectors as a string
- prefix (str): Prepended to XPath expression (default: "descendant-or-self::")
Returns:
str: The equivalent XPath 1.0 expression
Raises:
SelectorSyntaxError: On invalid selectors
ExpressionError: On unknown/unsupported selectors
"""
def selector_to_xpath(
self,
selector: Selector,
prefix: str = "descendant-or-self::",
translate_pseudo_elements: bool = False
) -> str:
"""
Translate a single parsed selector to XPath.
Parameters:
- selector (Selector): A parsed Selector object
- prefix (str): Prepended to XPath expression (default: "descendant-or-self::")
- translate_pseudo_elements (bool): Whether to handle pseudo-elements
Returns:
str: The equivalent XPath 1.0 expression
Raises:
ExpressionError: On unknown/unsupported selectors
"""
def xpath_pseudo_element(self, xpath, pseudo_element):
"""
Handle pseudo-element in XPath translation.
Parameters:
- xpath: XPath expression object
- pseudo_element (PseudoElement): Pseudo-element to handle
Returns:
XPath expression with pseudo-element handling
"""
@staticmethod
def xpath_literal(s: str) -> str:
"""
Create properly escaped XPath literal from string.
Parameters:
- s (str): String to escape
Returns:
str: XPath-escaped string literal
"""
# Configuration attributes
id_attribute: str = "id" # Attribute used for ID selectors
lang_attribute: str = "xml:lang" # Attribute used for :lang() pseudo-class
lower_case_element_names: bool = False # Case sensitivity for element names
lower_case_attribute_names: bool = False # Case sensitivity for attribute names
lower_case_attribute_values: bool = False # Case sensitivity for attribute valuesTranslate CSS selectors to XPath expressions optimized for HTML documents with HTML-specific pseudo-class support.
class HTMLTranslator(GenericTranslator):
"""
Translator for HTML documents.
Has useful implementations of HTML-specific pseudo-classes and
handles HTML case-insensitivity rules.
"""
def __init__(self, xhtml: bool = False):
"""
Initialize HTML translator.
Parameters:
- xhtml (bool): If False (default), element and attribute names are case-insensitive
"""
# Overridden configuration attributes
lang_attribute: str = "lang" # Uses 'lang' instead of 'xml:lang' for HTMLWork with parsed CSS selectors as structured objects for analysis and manipulation.
class Selector:
"""
Represents a parsed CSS selector.
"""
def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None):
"""
Create a Selector object.
Parameters:
- tree (Tree): The parsed selector tree
- pseudo_element (PseudoElement | None): Pseudo-element if present
"""
def canonical(self) -> str:
"""
Return a CSS representation for this selector.
Returns:
str: CSS selector string
"""
def specificity(self) -> tuple[int, int, int]:
"""
Return the CSS specificity of this selector.
Returns:
tuple[int, int, int]: Specificity as (a, b, c) tuple per CSS specification
"""
# Attributes
parsed_tree: Tree # The parsed selector tree
pseudo_element: PseudoElement | None # Pseudo-element if presentHandle functional pseudo-elements with arguments like ::name(arguments).
class FunctionalPseudoElement:
"""
Represents functional pseudo-elements like ::name(arguments).
"""
def __init__(self, name: str, arguments: Sequence[Token]):
"""
Create a functional pseudo-element.
Parameters:
- name (str): The pseudo-element name
- arguments (Sequence[Token]): The argument tokens
"""
def argument_types(self) -> list[str]:
"""
Get the types of the pseudo-element arguments.
Returns:
list[str]: List of argument token types
"""
def canonical(self) -> str:
"""
Return CSS representation of the functional pseudo-element.
Returns:
str: CSS pseudo-element string
"""
# Attributes
name: str # The pseudo-element name
arguments: Sequence[Token] # The argument tokensclass SelectorError(Exception):
"""
Base exception for CSS selector related errors.
Common parent for SelectorSyntaxError and ExpressionError.
Use except SelectorError: to catch both exception types.
"""
class SelectorSyntaxError(SelectorError, SyntaxError):
"""
Exception raised when parsing a selector that does not match the CSS grammar.
"""
class ExpressionError(SelectorError, RuntimeError):
"""
Exception raised for unknown or unsupported selector features during XPath translation.
"""Basic error handling:
from cssselect import GenericTranslator, SelectorError
translator = GenericTranslator()
try:
xpath = translator.css_to_xpath('div.content > p')
except SelectorError as e:
print(f"Selector error: {e}")Specific error handling:
from cssselect import parse, SelectorSyntaxError, ExpressionError
try:
selectors = parse('div.content > p')
# Process selectors...
except SelectorSyntaxError as e:
print(f"Invalid CSS syntax: {e}")
except ExpressionError as e:
print(f"Unsupported selector feature: {e}")from cssselect import parse
# Analyze selector specificity and structure
selectors = parse('div.content #main, body > nav a:hover')
for selector in selectors:
print(f"Selector: {selector.canonical()}")
print(f"Specificity: {selector.specificity()}")
if selector.pseudo_element:
print(f"Pseudo-element: {selector.pseudo_element}")from cssselect import GenericTranslator
# Use custom prefix for XPath expression
translator = GenericTranslator()
xpath = translator.css_to_xpath('div > p', prefix="./")
print(xpath) # "./div/p"
# Translate single selector with pseudo-element handling
from cssselect import parse
selectors = parse('div::before')
xpath = translator.selector_to_xpath(
selectors[0],
prefix="descendant::",
translate_pseudo_elements=True
)from cssselect import GenericTranslator, HTMLTranslator
css = 'INPUT:checked'
# Generic (case-sensitive) translation
generic = GenericTranslator()
generic_xpath = generic.css_to_xpath(css)
# HTML (case-insensitive with HTML pseudo-classes) translation
html = HTMLTranslator()
html_xpath = html.css_to_xpath(css)
print(f"Generic: {generic_xpath}")
print(f"HTML: {html_xpath}")Advanced users working with parsed selectors may encounter these tree node classes:
class Element:
"""Represents element selectors (tag, *, namespace|tag)."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Class:
"""Represents class selectors (.classname)."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Hash:
"""Represents ID selectors (#id)."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Attrib:
"""Represents attribute selectors ([attr], [attr=val], etc.)."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Pseudo:
"""Represents pseudo-class selectors (:hover, :first-child)."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Function:
"""Represents functional pseudo-classes (:nth-child(2n+1))."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Negation:
"""Represents :not() pseudo-class."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Relation:
"""Represents :has() relational pseudo-class."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class Matching:
"""Represents :is() pseudo-class."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class SpecificityAdjustment:
"""Represents :where() pseudo-class."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...
class CombinedSelector:
"""Represents combined selectors with combinators ('>', '+', '~', ' ')."""
def canonical(self) -> str: ...
def specificity(self) -> tuple[int, int, int]: ...# Type aliases for internal selector tree structure
Tree = Union[
Element, Hash, Class, Function, Pseudo, Attrib,
Negation, Relation, Matching, SpecificityAdjustment, CombinedSelector
]
PseudoElement = Union[FunctionalPseudoElement, str]class Token(tuple[str, Optional[str]]):
"""
Represents a CSS token during parsing.
Token types include: IDENT, HASH, STRING, S (whitespace), DELIM, NUMBER, EOF
"""
def __new__(cls, type_: str, value: str | None, pos: int):
"""
Create a new token.
Parameters:
- type_ (str): Token type (IDENT, HASH, STRING, S, DELIM, NUMBER, EOF)
- value (str | None): Token value
- pos (int): Position in source string
"""
def is_delim(self, *values: str) -> bool:
"""
Check if token is delimiter with specific value(s).
Parameters:
- *values (str): Values to check against
Returns:
bool: True if token is delimiter with one of the specified values
"""
def css(self) -> str:
"""
Return CSS representation of the token.
Returns:
str: CSS string representation
"""
# Properties
type: str # Token type
value: str | None # Token value
pos: int # Position in source
class EOFToken(Token):
"""Special end-of-file token."""Advanced parsing and string manipulation utilities:
def parse_series(tokens) -> tuple[int, int]:
"""
Parse :nth-child() style arguments like '2n+1'.
Parameters:
- tokens: Iterable of tokens representing the series expression
Returns:
tuple[int, int]: (a, b) values for an + b expression
"""
def ascii_lower(string: str) -> str:
"""
ASCII-only lowercase conversion.
Parameters:
- string (str): String to convert
Returns:
str: Lowercase string using ASCII rules only
"""
def unescape_ident(value: str) -> str:
"""
Unescape CSS identifier strings.
Parameters:
- value (str): CSS identifier with possible escape sequences
Returns:
str: Unescaped identifier string
"""VERSION = "1.3.0"
__version__ = "1.3.0"