AI-powered browser automation library that enables language models to control web browsers for automated tasks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced DOM extraction, serialization, element indexing, and interaction capabilities for intelligent web page understanding. The DomService provides sophisticated DOM analysis and manipulation features that enable AI agents to understand and interact with web pages effectively.
Central service for DOM tree extraction and manipulation with support for cross-origin content and intelligent element indexing.
class DomService:
def __init__(
self,
browser_session: BrowserSession,
logger: logging.Logger = None,
cross_origin_iframes: bool = False,
include_attributes: list[str] = None
):
"""
Initialize DOM processing service.
Parameters:
- browser_session: Browser session for DOM access
- logger: Optional custom logger instance
- cross_origin_iframes: Include cross-origin iframe content
- include_attributes: DOM attributes to include in serialization
"""
async def get_dom_tree(self) -> DomTree:
"""
Extract complete DOM tree from current page.
Returns:
DomTree: Structured representation of page DOM
"""
async def get_clickable_elements(self) -> list[ElementInfo]:
"""
Extract all clickable/interactable elements from page.
Returns:
list[ElementInfo]: List of elements that can be interacted with
"""
async def serialize_dom(
self,
include_text: bool = True,
include_attributes: bool = True,
max_depth: int = None
) -> str:
"""
Serialize DOM tree to text representation.
Parameters:
- include_text: Include text content of elements
- include_attributes: Include element attributes
- max_depth: Maximum tree depth to serialize
Returns:
str: Text representation of DOM structure
"""
async def find_elements_by_text(self, text: str) -> list[ElementInfo]:
"""
Find elements containing specific text.
Parameters:
- text: Text to search for in elements
Returns:
list[ElementInfo]: Elements containing the text
"""
async def find_elements_by_selector(self, selector: str) -> list[ElementInfo]:
"""
Find elements using CSS selector.
Parameters:
- selector: CSS selector string
Returns:
list[ElementInfo]: Elements matching the selector
"""
async def get_element_screenshot(self, index: int) -> str:
"""
Take screenshot of specific element.
Parameters:
- index: Element index
Returns:
str: Path to element screenshot image
"""Hierarchical representation of web page DOM structure with element relationships and metadata.
class DomTree:
"""Complete DOM tree representation."""
root: DomNode
total_elements: int
clickable_elements: int
form_elements: int
interactive_elements: int
class DomNode:
"""Individual DOM node representation."""
tag: str
text: str
attributes: dict[str, str]
index: int
children: list[DomNode]
parent: DomNode
bounding_box: BoundingBox
is_clickable: bool
is_visible: bool
xpath: str
css_selector: str
class BoundingBox:
"""Element positioning and dimensions."""
x: float
y: float
width: float
height: float
top: float
left: float
bottom: float
right: floatDetailed information about individual DOM elements for interaction and analysis.
class ElementInfo:
"""Comprehensive element information."""
index: int
tag: str
text: str
attributes: dict[str, str]
bounding_box: BoundingBox
is_clickable: bool
is_visible: bool
is_enabled: bool
element_type: str # 'button', 'input', 'link', 'text', etc.
xpath: str
css_selector: str
parent_index: int
children_indices: list[int]
class FormElementInfo(ElementInfo):
"""Form-specific element information."""
input_type: str # 'text', 'password', 'email', 'checkbox', etc.
is_required: bool
placeholder: str
value: str
min_value: str
max_value: str
pattern: str
class SelectElementInfo(ElementInfo):
"""Select/dropdown element information."""
options: list[SelectOption]
selected_value: str
multiple: bool
class SelectOption:
"""Option within select element."""
value: str
text: str
selected: bool
disabled: boolAdvanced analysis of element interactability and interaction patterns.
class InteractionAnalyzer:
"""Analyze element interaction possibilities."""
async def analyze_clickability(self, element: ElementInfo) -> ClickabilityAnalysis:
"""
Analyze how clickable an element is.
Parameters:
- element: Element to analyze
Returns:
ClickabilityAnalysis: Detailed clickability assessment
"""
async def analyze_form_structure(self, form_index: int) -> FormAnalysis:
"""
Analyze form structure and required fields.
Parameters:
- form_index: Index of form element
Returns:
FormAnalysis: Complete form structure analysis
"""
async def suggest_interaction_strategy(
self,
target_goal: str
) -> InteractionStrategy:
"""
Suggest best interaction strategy for achieving goal.
Parameters:
- target_goal: Description of desired outcome
Returns:
InteractionStrategy: Recommended interaction sequence
"""
class ClickabilityAnalysis:
"""Analysis of element clickability."""
is_clickable: bool
confidence: float # 0.0-1.0
blocking_elements: list[ElementInfo]
alternative_elements: list[ElementInfo]
click_coordinates: tuple[float, float]
class FormAnalysis:
"""Complete form structure analysis."""
form_element: ElementInfo
required_fields: list[FormElementInfo]
optional_fields: list[FormElementInfo]
submit_buttons: list[ElementInfo]
validation_rules: dict[str, str]
class InteractionStrategy:
"""Recommended interaction sequence."""
steps: list[InteractionStep]
confidence: float
alternatives: list[InteractionStep]
class InteractionStep:
"""Individual interaction step."""
action: str # 'click', 'input', 'scroll', 'wait'
element_index: int
parameters: dict[str, Any]
expected_outcome: strAdvanced content extraction capabilities for text, images, and structured data.
class ContentExtractor:
"""Extract various types of content from pages."""
async def extract_text_content(
self,
clean: bool = True,
include_hidden: bool = False
) -> str:
"""
Extract text content from page.
Parameters:
- clean: Clean and normalize text
- include_hidden: Include hidden element text
Returns:
str: Extracted text content
"""
async def extract_links(
self,
internal_only: bool = False,
include_anchors: bool = True
) -> list[LinkInfo]:
"""
Extract all links from page.
Parameters:
- internal_only: Only include internal links
- include_anchors: Include anchor links
Returns:
list[LinkInfo]: All links found on page
"""
async def extract_images(
self,
include_data_urls: bool = False,
min_size: tuple[int, int] = None
) -> list[ImageInfo]:
"""
Extract image information from page.
Parameters:
- include_data_urls: Include base64 data URLs
- min_size: Minimum image dimensions (width, height)
Returns:
list[ImageInfo]: All images found on page
"""
async def extract_tables(self) -> list[TableInfo]:
"""
Extract structured table data.
Returns:
list[TableInfo]: All tables with structured data
"""
class LinkInfo:
"""Link element information."""
url: str
text: str
title: str
element_index: int
is_external: bool
is_anchor: bool
class ImageInfo:
"""Image element information."""
src: str
alt: str
title: str
width: int
height: int
element_index: int
is_data_url: bool
class TableInfo:
"""Table structure and data."""
headers: list[str]
rows: list[list[str]]
element_index: int
caption: strfrom browser_use import BrowserSession, DomService
session = BrowserSession()
dom_service = DomService(session)
# Navigate to page
await session.navigate_to_url("https://example.com")
# Get complete DOM tree
dom_tree = await dom_service.get_dom_tree()
print(f"Total elements: {dom_tree.total_elements}")
print(f"Clickable elements: {dom_tree.clickable_elements}")
# Get clickable elements
clickable = await dom_service.get_clickable_elements()
for element in clickable:
print(f"Index {element.index}: {element.tag} - {element.text}")from browser_use import DomService, BrowserSession
session = BrowserSession()
dom_service = DomService(session)
await session.navigate_to_url("https://example.com/search")
# Find search box by text
search_elements = await dom_service.find_elements_by_text("Search")
if search_elements:
search_box = search_elements[0]
print(f"Found search box at index: {search_box.index}")
# Find elements by CSS selector
buttons = await dom_service.find_elements_by_selector("button.primary")
for button in buttons:
print(f"Button {button.index}: {button.text}")
# Take screenshot of specific element
if buttons:
screenshot_path = await dom_service.get_element_screenshot(buttons[0].index)
print(f"Button screenshot saved: {screenshot_path}")from browser_use import DomService, BrowserSession
session = BrowserSession()
# Configure DOM service with custom attributes
dom_service = DomService(
browser_session=session,
cross_origin_iframes=True, # Include iframe content
include_attributes=[
'id', 'class', 'name', 'data-testid',
'aria-label', 'placeholder', 'href', 'src'
]
)
await session.navigate_to_url("https://complex-site.com")
# Serialize DOM with custom options
dom_text = await dom_service.serialize_dom(
include_text=True,
include_attributes=True,
max_depth=5 # Limit depth for large pages
)
print("DOM Structure:")
print(dom_text[:1000]) # First 1000 charactersfrom browser_use import DomService, BrowserSession
session = BrowserSession()
dom_service = DomService(session)
await session.navigate_to_url("https://example.com/contact")
# Find all form elements
forms = await dom_service.find_elements_by_selector("form")
for form in forms:
print(f"Form {form.index}:")
# Analyze form structure
analyzer = InteractionAnalyzer()
form_analysis = await analyzer.analyze_form_structure(form.index)
print(f" Required fields: {len(form_analysis.required_fields)}")
for field in form_analysis.required_fields:
print(f" {field.tag}[{field.input_type}]: {field.placeholder}")
print(f" Submit buttons: {len(form_analysis.submit_buttons)}")
for button in form_analysis.submit_buttons:
print(f" {button.text}")from browser_use import DomService, BrowserSession, ContentExtractor
session = BrowserSession()
dom_service = DomService(session)
extractor = ContentExtractor()
await session.navigate_to_url("https://news-site.com/article")
# Extract page text content
text_content = await extractor.extract_text_content(clean=True)
print(f"Article text ({len(text_content)} chars):")
print(text_content[:500])
# Extract all links
links = await extractor.extract_links(internal_only=False)
print(f"\nFound {len(links)} links:")
for link in links[:5]: # First 5 links
print(f" {link.text}: {link.url}")
# Extract images
images = await extractor.extract_images(min_size=(100, 100))
print(f"\nFound {len(images)} images:")
for image in images[:3]: # First 3 images
print(f" {image.alt}: {image.src}")
# Extract tables if any
tables = await extractor.extract_tables()
if tables:
print(f"\nFound {len(tables)} tables:")
for i, table in enumerate(tables):
print(f" Table {i}: {len(table.headers)} columns, {len(table.rows)} rows")from browser_use import DomService, BrowserSession, InteractionAnalyzer
session = BrowserSession()
dom_service = DomService(session)
analyzer = InteractionAnalyzer()
await session.navigate_to_url("https://ecommerce-site.com/product")
# Plan interaction strategy for adding item to cart
strategy = await analyzer.suggest_interaction_strategy(
"Add this product to shopping cart"
)
print(f"Interaction strategy (confidence: {strategy.confidence}):")
for i, step in enumerate(strategy.steps):
print(f" Step {i+1}: {step.action} on element {step.element_index}")
print(f" Expected: {step.expected_outcome}")
# Execute strategy
for step in strategy.steps:
if step.action == "click":
await session.click_element(step.element_index)
elif step.action == "input":
text = step.parameters.get("text", "")
await session.input_text(step.element_index, text)
elif step.action == "scroll":
await session.scroll(step.parameters.get("down", True), 1)from browser_use import DomService, BrowserSession, InteractionAnalyzer
session = BrowserSession()
dom_service = DomService(session)
analyzer = InteractionAnalyzer()
await session.navigate_to_url("https://complex-ui.com")
# Find potential target element
target_elements = await dom_service.find_elements_by_text("Subscribe")
for element in target_elements:
# Analyze clickability
analysis = await analyzer.analyze_clickability(element)
print(f"Element {element.index} clickability:")
print(f" Clickable: {analysis.is_clickable}")
print(f" Confidence: {analysis.confidence}")
if analysis.blocking_elements:
print(f" Blocked by {len(analysis.blocking_elements)} elements")
if analysis.alternative_elements:
print(f" {len(analysis.alternative_elements)} alternatives available")
if analysis.is_clickable:
x, y = analysis.click_coordinates
print(f" Best click point: ({x}, {y})")# Default DOM attributes to include in serialization
DEFAULT_INCLUDE_ATTRIBUTES: list[str] = [
'id', 'class', 'name', 'aria-label', 'data-testid',
'placeholder', 'href', 'src', 'type', 'value'
]
# Element interaction priorities
CLICKABLE_ELEMENT_PRIORITIES: dict[str, int] = {
'button': 10,
'a': 9,
'input[type="submit"]': 8,
'input[type="button"]': 7,
'select': 6,
'input': 5
}
# Maximum DOM serialization limits
MAX_DOM_TEXT_LENGTH: int = 50000
MAX_ELEMENT_DEPTH: int = 20
MAX_CLICKABLE_ELEMENTS: int = 100Install with Tessl CLI
npx tessl i tessl/pypi-browser-use