CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-browser-use

AI-powered browser automation library that enables language models to control web browsers for automated tasks

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

dom-processing.mddocs/

DOM Processing

Advanced DOM extraction, serialization, element indexing, and interaction capabilities for intelligent web page understanding. The DomService provides sophisticated DOM analysis and manipulation features that enable AI agents to understand and interact with web pages effectively.

Capabilities

DOM Service Core

Central service for DOM tree extraction and manipulation with support for cross-origin content and intelligent element indexing.

class DomService:
    def __init__(
        self,
        browser_session: BrowserSession,
        logger: logging.Logger = None,
        cross_origin_iframes: bool = False,
        include_attributes: list[str] = None
    ):
        """
        Initialize DOM processing service.

        Parameters:
        - browser_session: Browser session for DOM access
        - logger: Optional custom logger instance
        - cross_origin_iframes: Include cross-origin iframe content
        - include_attributes: DOM attributes to include in serialization
        """

    async def get_dom_tree(self) -> DomTree:
        """
        Extract complete DOM tree from current page.

        Returns:
        DomTree: Structured representation of page DOM
        """

    async def get_clickable_elements(self) -> list[ElementInfo]:
        """
        Extract all clickable/interactable elements from page.

        Returns:
        list[ElementInfo]: List of elements that can be interacted with
        """

    async def serialize_dom(
        self,
        include_text: bool = True,
        include_attributes: bool = True,
        max_depth: int = None
    ) -> str:
        """
        Serialize DOM tree to text representation.

        Parameters:
        - include_text: Include text content of elements
        - include_attributes: Include element attributes
        - max_depth: Maximum tree depth to serialize

        Returns:
        str: Text representation of DOM structure
        """

    async def find_elements_by_text(self, text: str) -> list[ElementInfo]:
        """
        Find elements containing specific text.

        Parameters:
        - text: Text to search for in elements

        Returns:
        list[ElementInfo]: Elements containing the text
        """

    async def find_elements_by_selector(self, selector: str) -> list[ElementInfo]:
        """
        Find elements using CSS selector.

        Parameters:
        - selector: CSS selector string

        Returns:
        list[ElementInfo]: Elements matching the selector
        """

    async def get_element_screenshot(self, index: int) -> str:
        """
        Take screenshot of specific element.

        Parameters:
        - index: Element index

        Returns:
        str: Path to element screenshot image
        """

DOM Tree Structure

Hierarchical representation of web page DOM structure with element relationships and metadata.

class DomTree:
    """Complete DOM tree representation."""
    root: DomNode
    total_elements: int
    clickable_elements: int
    form_elements: int
    interactive_elements: int

class DomNode:
    """Individual DOM node representation."""
    tag: str
    text: str
    attributes: dict[str, str]
    index: int
    children: list[DomNode]
    parent: DomNode
    bounding_box: BoundingBox
    is_clickable: bool
    is_visible: bool
    xpath: str
    css_selector: str

class BoundingBox:
    """Element positioning and dimensions."""
    x: float
    y: float
    width: float
    height: float
    top: float
    left: float
    bottom: float
    right: float

Element Information

Detailed information about individual DOM elements for interaction and analysis.

class ElementInfo:
    """Comprehensive element information."""
    index: int
    tag: str
    text: str
    attributes: dict[str, str]
    bounding_box: BoundingBox
    is_clickable: bool
    is_visible: bool
    is_enabled: bool
    element_type: str  # 'button', 'input', 'link', 'text', etc.
    xpath: str
    css_selector: str
    parent_index: int
    children_indices: list[int]

class FormElementInfo(ElementInfo):
    """Form-specific element information."""
    input_type: str  # 'text', 'password', 'email', 'checkbox', etc.
    is_required: bool
    placeholder: str
    value: str
    min_value: str
    max_value: str
    pattern: str

class SelectElementInfo(ElementInfo):
    """Select/dropdown element information."""
    options: list[SelectOption]
    selected_value: str
    multiple: bool

class SelectOption:
    """Option within select element."""
    value: str
    text: str
    selected: bool
    disabled: bool

Element Interaction Analysis

Advanced analysis of element interactability and interaction patterns.

class InteractionAnalyzer:
    """Analyze element interaction possibilities."""
    
    async def analyze_clickability(self, element: ElementInfo) -> ClickabilityAnalysis:
        """
        Analyze how clickable an element is.

        Parameters:
        - element: Element to analyze

        Returns:
        ClickabilityAnalysis: Detailed clickability assessment
        """

    async def analyze_form_structure(self, form_index: int) -> FormAnalysis:
        """
        Analyze form structure and required fields.

        Parameters:
        - form_index: Index of form element

        Returns:
        FormAnalysis: Complete form structure analysis
        """

    async def suggest_interaction_strategy(
        self,
        target_goal: str
    ) -> InteractionStrategy:
        """
        Suggest best interaction strategy for achieving goal.

        Parameters:
        - target_goal: Description of desired outcome

        Returns:
        InteractionStrategy: Recommended interaction sequence
        """

class ClickabilityAnalysis:
    """Analysis of element clickability."""
    is_clickable: bool
    confidence: float  # 0.0-1.0
    blocking_elements: list[ElementInfo]
    alternative_elements: list[ElementInfo]
    click_coordinates: tuple[float, float]

class FormAnalysis:
    """Complete form structure analysis."""
    form_element: ElementInfo
    required_fields: list[FormElementInfo]
    optional_fields: list[FormElementInfo]
    submit_buttons: list[ElementInfo]
    validation_rules: dict[str, str]

class InteractionStrategy:
    """Recommended interaction sequence."""
    steps: list[InteractionStep]
    confidence: float
    alternatives: list[InteractionStep]

class InteractionStep:
    """Individual interaction step."""
    action: str  # 'click', 'input', 'scroll', 'wait'
    element_index: int
    parameters: dict[str, Any]
    expected_outcome: str

Content Extraction

Advanced content extraction capabilities for text, images, and structured data.

class ContentExtractor:
    """Extract various types of content from pages."""
    
    async def extract_text_content(
        self,
        clean: bool = True,
        include_hidden: bool = False
    ) -> str:
        """
        Extract text content from page.

        Parameters:
        - clean: Clean and normalize text
        - include_hidden: Include hidden element text

        Returns:
        str: Extracted text content
        """

    async def extract_links(
        self,
        internal_only: bool = False,
        include_anchors: bool = True
    ) -> list[LinkInfo]:
        """
        Extract all links from page.

        Parameters:
        - internal_only: Only include internal links
        - include_anchors: Include anchor links

        Returns:
        list[LinkInfo]: All links found on page
        """

    async def extract_images(
        self,
        include_data_urls: bool = False,
        min_size: tuple[int, int] = None
    ) -> list[ImageInfo]:
        """
        Extract image information from page.

        Parameters:
        - include_data_urls: Include base64 data URLs
        - min_size: Minimum image dimensions (width, height)

        Returns:
        list[ImageInfo]: All images found on page
        """

    async def extract_tables(self) -> list[TableInfo]:
        """
        Extract structured table data.

        Returns:
        list[TableInfo]: All tables with structured data
        """

class LinkInfo:
    """Link element information."""
    url: str
    text: str
    title: str
    element_index: int
    is_external: bool
    is_anchor: bool

class ImageInfo:
    """Image element information."""
    src: str
    alt: str
    title: str
    width: int
    height: int
    element_index: int
    is_data_url: bool

class TableInfo:
    """Table structure and data."""
    headers: list[str]
    rows: list[list[str]]
    element_index: int
    caption: str

Usage Examples

Basic DOM Analysis

from browser_use import BrowserSession, DomService

session = BrowserSession()
dom_service = DomService(session)

# Navigate to page
await session.navigate_to_url("https://example.com")

# Get complete DOM tree
dom_tree = await dom_service.get_dom_tree()
print(f"Total elements: {dom_tree.total_elements}")
print(f"Clickable elements: {dom_tree.clickable_elements}")

# Get clickable elements
clickable = await dom_service.get_clickable_elements()
for element in clickable:
    print(f"Index {element.index}: {element.tag} - {element.text}")

Element Search and Interaction

from browser_use import DomService, BrowserSession

session = BrowserSession()
dom_service = DomService(session)

await session.navigate_to_url("https://example.com/search")

# Find search box by text
search_elements = await dom_service.find_elements_by_text("Search")
if search_elements:
    search_box = search_elements[0]
    print(f"Found search box at index: {search_box.index}")

# Find elements by CSS selector
buttons = await dom_service.find_elements_by_selector("button.primary")
for button in buttons:
    print(f"Button {button.index}: {button.text}")

# Take screenshot of specific element
if buttons:
    screenshot_path = await dom_service.get_element_screenshot(buttons[0].index)
    print(f"Button screenshot saved: {screenshot_path}")

Advanced DOM Configuration

from browser_use import DomService, BrowserSession

session = BrowserSession()

# Configure DOM service with custom attributes
dom_service = DomService(
    browser_session=session,
    cross_origin_iframes=True,  # Include iframe content
    include_attributes=[
        'id', 'class', 'name', 'data-testid', 
        'aria-label', 'placeholder', 'href', 'src'
    ]
)

await session.navigate_to_url("https://complex-site.com")

# Serialize DOM with custom options
dom_text = await dom_service.serialize_dom(
    include_text=True,
    include_attributes=True,
    max_depth=5  # Limit depth for large pages
)

print("DOM Structure:")
print(dom_text[:1000])  # First 1000 characters

Form Analysis Workflow

from browser_use import DomService, BrowserSession

session = BrowserSession()
dom_service = DomService(session)

await session.navigate_to_url("https://example.com/contact")

# Find all form elements
forms = await dom_service.find_elements_by_selector("form")

for form in forms:
    print(f"Form {form.index}:")
    
    # Analyze form structure
    analyzer = InteractionAnalyzer()
    form_analysis = await analyzer.analyze_form_structure(form.index)
    
    print(f"  Required fields: {len(form_analysis.required_fields)}")
    for field in form_analysis.required_fields:
        print(f"    {field.tag}[{field.input_type}]: {field.placeholder}")
    
    print(f"  Submit buttons: {len(form_analysis.submit_buttons)}")
    for button in form_analysis.submit_buttons:
        print(f"    {button.text}")

Content Extraction Workflow

from browser_use import DomService, BrowserSession, ContentExtractor

session = BrowserSession()
dom_service = DomService(session)
extractor = ContentExtractor()

await session.navigate_to_url("https://news-site.com/article")

# Extract page text content
text_content = await extractor.extract_text_content(clean=True)
print(f"Article text ({len(text_content)} chars):")
print(text_content[:500])

# Extract all links
links = await extractor.extract_links(internal_only=False)
print(f"\nFound {len(links)} links:")
for link in links[:5]:  # First 5 links
    print(f"  {link.text}: {link.url}")

# Extract images
images = await extractor.extract_images(min_size=(100, 100))
print(f"\nFound {len(images)} images:")
for image in images[:3]:  # First 3 images
    print(f"  {image.alt}: {image.src}")

# Extract tables if any
tables = await extractor.extract_tables()
if tables:
    print(f"\nFound {len(tables)} tables:")
    for i, table in enumerate(tables):
        print(f"  Table {i}: {len(table.headers)} columns, {len(table.rows)} rows")

Interaction Strategy Planning

from browser_use import DomService, BrowserSession, InteractionAnalyzer

session = BrowserSession()
dom_service = DomService(session)
analyzer = InteractionAnalyzer()

await session.navigate_to_url("https://ecommerce-site.com/product")

# Plan interaction strategy for adding item to cart
strategy = await analyzer.suggest_interaction_strategy(
    "Add this product to shopping cart"
)

print(f"Interaction strategy (confidence: {strategy.confidence}):")
for i, step in enumerate(strategy.steps):
    print(f"  Step {i+1}: {step.action} on element {step.element_index}")
    print(f"    Expected: {step.expected_outcome}")

# Execute strategy
for step in strategy.steps:
    if step.action == "click":
        await session.click_element(step.element_index)
    elif step.action == "input":
        text = step.parameters.get("text", "")
        await session.input_text(step.element_index, text)
    elif step.action == "scroll":
        await session.scroll(step.parameters.get("down", True), 1)

Element Clickability Analysis

from browser_use import DomService, BrowserSession, InteractionAnalyzer

session = BrowserSession()
dom_service = DomService(session)
analyzer = InteractionAnalyzer()

await session.navigate_to_url("https://complex-ui.com")

# Find potential target element
target_elements = await dom_service.find_elements_by_text("Subscribe")

for element in target_elements:
    # Analyze clickability
    analysis = await analyzer.analyze_clickability(element)
    
    print(f"Element {element.index} clickability:")
    print(f"  Clickable: {analysis.is_clickable}")
    print(f"  Confidence: {analysis.confidence}")
    
    if analysis.blocking_elements:
        print(f"  Blocked by {len(analysis.blocking_elements)} elements")
    
    if analysis.alternative_elements:
        print(f"  {len(analysis.alternative_elements)} alternatives available")
    
    if analysis.is_clickable:
        x, y = analysis.click_coordinates
        print(f"  Best click point: ({x}, {y})")

Configuration Constants

# Default DOM attributes to include in serialization
DEFAULT_INCLUDE_ATTRIBUTES: list[str] = [
    'id', 'class', 'name', 'aria-label', 'data-testid',
    'placeholder', 'href', 'src', 'type', 'value'
]

# Element interaction priorities
CLICKABLE_ELEMENT_PRIORITIES: dict[str, int] = {
    'button': 10,
    'a': 9,
    'input[type="submit"]': 8,
    'input[type="button"]': 7,
    'select': 6,
    'input': 5
}

# Maximum DOM serialization limits
MAX_DOM_TEXT_LENGTH: int = 50000
MAX_ELEMENT_DEPTH: int = 20
MAX_CLICKABLE_ELEMENTS: int = 100

Install with Tessl CLI

npx tessl i tessl/pypi-browser-use

docs

agent-orchestration.md

browser-actions.md

browser-session.md

dom-processing.md

index.md

llm-integration.md

task-results.md

tile.json