AI-powered browser automation library that enables language models to control web browsers for automated tasks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Browser session creation, configuration, and control for CDP-based browser automation. The BrowserSession class manages browser lifecycle and provides state access capabilities. Browser actions like navigation, clicking, and form interaction are handled through the Tools/Actions system (see Browser Actions).
Core browser session management with CDP protocol integration for direct browser control.
class BrowserSession:
async def get_browser_state_summary(
self,
cache_clickable_elements_hashes: bool = True,
include_screenshot: bool = True,
cached: bool = False,
include_recent_events: bool = False
) -> BrowserStateSummary:
"""
Get current browser state including page content, URL, and available elements.
Parameters:
- cache_clickable_elements_hashes: Cache element hashes for performance
- include_screenshot: Include screenshot in the state summary
- cached: Use cached state if available
- include_recent_events: Include recent browser events in summary
Returns:
BrowserStateSummary: Current page state with DOM elements and metadata
"""
async def get_tabs(self) -> list[TabInfo]:
"""
Get list of all browser tabs.
Returns:
list[TabInfo]: List of all available browser tabs
"""
async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
"""
Get DOM element by its index from the element mapping.
Parameters:
- index: Element index from DOM serialization
Returns:
EnhancedDOMTreeNode | None: DOM element or None if not found
"""
async def get_current_page_url(self) -> str:
"""
Get URL of currently active page.
Returns:
str: Current page URL
"""
async def get_current_page_title(self) -> str:
"""
Get title of currently active page.
Returns:
str: Current page title
"""
async def start(self) -> None:
"""Start the browser session."""
async def kill(self) -> None:
"""Terminate browser session and cleanup resources."""
async def stop(self) -> None:
"""Stop the browser session gracefully."""Comprehensive browser configuration for customizing browser behavior, security settings, and automation parameters.
class BrowserProfile:
def __init__(
self,
headless: bool = False,
user_data_dir: str = None,
allowed_domains: list[str] = None,
downloads_path: str = None,
proxy: ProxySettings = None,
keep_alive: bool = False,
window_size: tuple[int, int] = (1920, 1080),
viewport_size: tuple[int, int] = None,
user_agent: str = None,
disable_web_security: bool = False,
disable_features: list[str] = None,
enable_features: list[str] = None,
extra_args: list[str] = None
):
"""
Configure browser behavior and settings.
Parameters:
- headless: Run browser in headless mode
- user_data_dir: Directory for browser user data
- allowed_domains: List of allowed domains (domain restriction)
- downloads_path: Directory for file downloads
- proxy: Proxy server configuration
- keep_alive: Keep browser alive after session ends
- window_size: Browser window dimensions
- viewport_size: Viewport dimensions (defaults to window_size)
- user_agent: Custom user agent string
- disable_web_security: Disable web security features
- disable_features: Chrome features to disable
- enable_features: Chrome features to enable
- extra_args: Additional Chrome command line arguments
"""
headless: bool
user_data_dir: str
allowed_domains: list[str]
downloads_path: str
proxy: ProxySettings
keep_alive: boolNetwork proxy settings for browser sessions.
class ProxySettings:
def __init__(
self,
server: str,
username: str = None,
password: str = None,
bypass_list: list[str] = None
):
"""
Configure proxy settings for browser session.
Parameters:
- server: Proxy server address (e.g., "proxy.example.com:8080")
- username: Proxy authentication username
- password: Proxy authentication password
- bypass_list: List of domains to bypass proxy
"""
server: str
username: str
password: str
bypass_list: list[str]Comprehensive browser state representation for agent decision-making.
class BrowserStateSummary:
"""
Current browser state information.
"""
url: str # Current page URL
title: str # Page title
tabs: list[TabInfo] # Available browser tabs
elements: list[ElementInfo] # Clickable/interactable elements
text_content: str # Page text content
screenshot_path: str # Path to current screenshot
viewport_size: tuple[int, int] # Viewport dimensions
class TabInfo:
"""Browser tab information."""
id: str
title: str
url: str
active: bool
class ElementInfo:
"""DOM element information."""
index: int
tag: str
text: str
attributes: dict[str, str]
bounding_box: dict[str, float]from browser_use import BrowserSession, BrowserProfile
# Create browser session with default profile
session = BrowserSession()
# Navigate and interact
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()
print(f"Page title: {state.title}")
# Cleanup
await session.kill()from browser_use import BrowserSession, BrowserProfile, ProxySettings
# Configure proxy
proxy = ProxySettings(
server="proxy.company.com:8080",
username="user",
password="pass",
bypass_list=["*.local", "127.0.0.1"]
)
# Create custom profile
profile = BrowserProfile(
headless=True,
user_data_dir="/tmp/browser-data",
allowed_domains=["*.example.com", "*.trusted-site.org"],
downloads_path="/tmp/downloads",
proxy=proxy,
window_size=(1440, 900),
user_agent="CustomBot/1.0"
)
# Create session with profile
session = BrowserSession(browser_profile=profile)
await session.navigate_to_url("https://example.com")from browser_use import BrowserSession
session = BrowserSession()
await session.navigate_to_url("https://example.com/form")
# Get current state
state = await session.get_browser_state_summary()
# Find search input (assuming it's element index 5)
search_input_index = 5
await session.input_text(search_input_index, "search query")
# Find and click search button (assuming it's element index 8)
search_button_index = 8
await session.click_element(search_button_index)
# Scroll down to see more results
await session.scroll(down=True, num_pages=2)from browser_use import BrowserSession
session = BrowserSession()
# Navigate and open multiple tabs
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()
# Switch between tabs
for tab in state.tabs:
print(f"Tab: {tab.title} - {tab.url}")
if not tab.active:
await session.switch_tab(tab.id)
# Do something in this tab
break
# Close unnecessary tabs
for tab in state.tabs:
if "unwanted" in tab.title.lower():
await session.close_tab(tab.id)from browser_use import BrowserSession, BrowserProfile
# Create persistent browser session
profile = BrowserProfile(
keep_alive=True,
user_data_dir="/persistent/browser/data"
)
session = BrowserSession(browser_profile=profile)
# Use session for multiple tasks
await session.navigate_to_url("https://site1.com")
# ... perform tasks ...
await session.navigate_to_url("https://site2.com")
# ... perform more tasks ...
# Session data persists for future use# Default browser configuration values
DEFAULT_BROWSER_PROFILE: BrowserProfile
CHROME_DEBUG_PORT: int = 9242
CHROME_DISABLED_COMPONENTS: list[str]
CHROME_HEADLESS_ARGS: list[str]
CHROME_DOCKER_ARGS: list[str]
# Screenshot and viewport limits
MAX_SCREENSHOT_HEIGHT: int = 2000
MAX_SCREENSHOT_WIDTH: int = 1920Install with Tessl CLI
npx tessl i tessl/pypi-browser-use