AI-powered browser automation library that enables language models to control web browsers for automated tasks
npx @tessl/cli install tessl/pypi-browser-use@0.7.0A comprehensive Python library that enables AI agents to control web browsers for automated tasks. Browser-use provides an intelligent agent framework combining browser automation capabilities with language model integration, supporting multiple LLM providers and offering sophisticated DOM manipulation, real-time browser control, and task execution features.
pip install browser-useimport browser_useCommon patterns for agent-based automation:
from browser_use import Agent, BrowserSession, ChatOpenAIIndividual component imports:
from browser_use import (
Agent, BrowserSession, BrowserProfile, Tools,
SystemPrompt, ActionResult, AgentHistoryList,
ChatOpenAI, ChatAnthropic, ChatGoogle
)from browser_use import Agent, ChatOpenAI
# Create an agent with a task
agent = Agent(
task="Search for weather in New York and extract the temperature",
llm=ChatOpenAI(model="gpt-4o")
)
# Run the agent task (async)
result = await agent.run()
# Run the agent task (sync)
result = agent.run_sync()
# Check if task completed successfully
if result.is_successful():
print(f"Task completed: {result.final_result()}")
else:
print(f"Task failed: {result.errors()}")from browser_use import Agent, BrowserProfile, BrowserSession
# Custom browser configuration
profile = BrowserProfile(
headless=True,
allowed_domains=["*.google.com", "*.wikipedia.org"],
downloads_path="/tmp/downloads"
)
# Create browser session with custom profile
session = BrowserSession(browser_profile=profile)
# Agent with custom browser
agent = Agent(
task="Search Wikipedia for Python programming language",
browser_session=session
)
result = agent.run_sync()Browser-use implements a multi-layered architecture for AI-powered browser automation:
This design enables AI agents to understand web pages visually and semantically, make intelligent decisions about interactions, and execute complex multi-step browser workflows autonomously.
Core agent functionality for task execution, including the main Agent class, execution control, history management, and task configuration options.
class Agent:
def __init__(
self,
task: str,
llm: BaseChatModel = ChatOpenAI(model='gpt-4o-mini'),
browser_session: BrowserSession = None,
tools: Tools = None,
use_vision: bool = True,
max_failures: int = 3,
**kwargs
): ...
async def run(self, max_steps: int = 100) -> AgentHistoryList: ...
def run_sync(self, max_steps: int = 100) -> AgentHistoryList: ...
async def step(self, step_info: AgentStepInfo = None) -> None: ...Browser session creation, configuration, and control including profile management, browser lifecycle, and basic navigation capabilities.
class BrowserSession:
async def get_browser_state_summary(self, **kwargs) -> BrowserStateSummary: ...
async def get_tabs(self) -> list[TabInfo]: ...
async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None: ...
async def get_current_page_url(self) -> str: ...
async def get_current_page_title(self) -> str: ...
class BrowserProfile:
def __init__(
self,
headless: bool = False,
user_data_dir: str = None,
allowed_domains: list[str] = None,
proxy: ProxySettings = None,
**kwargs
): ...Extensible action system with built-in browser automation capabilities including navigation, element interaction, form handling, and custom action registration.
class Tools:
def __init__(
self,
exclude_actions: list[str] = None,
output_model: type = None
): ...
async def act(
self,
action: ActionModel,
browser_session: BrowserSession,
**kwargs
) -> ActionResult: ...
# Built-in actions available
def search_google(query: str): ...
def go_to_url(url: str): ...
def click_element(index: int): ...
def input_text(index: int, text: str): ...
def scroll(down: bool, num_pages: float): ...
def done(text: str): ...Multi-provider language model support with consistent interfaces for OpenAI, Anthropic, Google, Groq, Azure OpenAI, and Ollama models.
class ChatOpenAI:
def __init__(
self,
model: str = "gpt-4o-mini",
temperature: float = 0.2,
frequency_penalty: float = 0.3
): ...
class ChatAnthropic:
def __init__(self, model: str = "claude-3-sonnet-20240229"): ...
class ChatGoogle:
def __init__(self, model: str = "gemini-pro"): ...Advanced DOM extraction, serialization, element indexing, and interaction capabilities for intelligent web page understanding.
class DomService:
def __init__(
self,
browser_session: BrowserSession,
cross_origin_iframes: bool = False
): ...Comprehensive result tracking, history management, and execution analysis including success/failure detection, error handling, and workflow replay capabilities.
class ActionResult:
is_done: bool = None
success: bool = None
error: str = None
extracted_content: str = None
attachments: list[str] = None
class AgentHistoryList:
def is_done(self) -> bool: ...
def is_successful(self) -> bool: ...
def final_result(self) -> str: ...
def errors(self) -> list[str]: ...
def save_to_file(self, filepath: str) -> None: ...Global configuration management and exception classes for robust error handling in browser automation workflows.
from browser_use.config import CONFIG
from browser_use.exceptions import LLMException
# Configuration properties
CONFIG.BROWSER_USE_LOGGING_LEVEL
CONFIG.ANONYMIZED_TELEMETRY
CONFIG.OPENAI_API_KEY
CONFIG.ANTHROPIC_API_KEYfrom typing import Protocol, TypeVar
from pydantic import BaseModel
T = TypeVar('T')
class BaseChatModel(Protocol):
model: str
provider: str
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion: ...
class AgentStructuredOutput(Protocol):
"""Base protocol for structured output models."""
pass
class TabInfo(BaseModel):
"""Browser tab information."""
url: str
title: str
target_id: str # Tab identifier
parent_target_id: str | None = None
class EnhancedDOMTreeNode(BaseModel):
"""Enhanced DOM tree node with interaction capabilities."""
tag: str
text: str | None = None
attributes: dict[str, str] = {}
index: int
class AgentState(BaseModel):
"""Agent state for advanced configuration."""
pass
class CloudSync(BaseModel):
"""Cloud synchronization service."""
pass