AI-powered browser automation library that enables language models to control web browsers for automated tasks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Core agent functionality for autonomous browser task execution. The Agent class serves as the main orchestrator, coordinating language models, browser sessions, and action execution to complete complex web automation tasks.
The Agent class provides comprehensive configuration options for task execution, browser control, and LLM integration.
class Agent:
def __init__(
self,
task: str,
llm: BaseChatModel = ChatOpenAI(model='gpt-4o-mini'),
# Optional browser parameters
browser_profile: BrowserProfile = None,
browser_session: BrowserSession = None,
browser: BrowserSession = None, # Alias for browser_session
tools: Tools = None,
controller: Tools = None, # Alias for tools
# Initial agent run parameters
sensitive_data: dict[str, str | dict[str, str]] = None,
initial_actions: list[dict[str, dict[str, Any]]] = None,
# Cloud callbacks
register_new_step_callback: Callable = None,
register_done_callback: Callable = None,
register_external_agent_status_raise_error_callback: Callable[[], Awaitable[bool]] = None,
# Agent settings
output_model_schema: type[AgentStructuredOutput] = None,
use_vision: bool = True,
save_conversation_path: str | Path = None,
save_conversation_path_encoding: str = 'utf-8',
max_failures: int = 3,
override_system_message: str = None,
extend_system_message: str = None,
generate_gif: bool | str = False,
available_file_paths: list[str] = None,
include_attributes: list[str] = None,
max_actions_per_step: int = 10,
use_thinking: bool = True,
flash_mode: bool = False,
max_history_items: int = None,
page_extraction_llm: BaseChatModel = None,
# Advanced parameters
injected_agent_state: AgentState = None,
source: str = None,
file_system_path: str = None,
task_id: str = None,
cloud_sync: CloudSync = None,
calculate_cost: bool = False,
display_files_in_done_text: bool = True,
include_tool_call_examples: bool = False,
vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
llm_timeout: int = 90,
step_timeout: int = 120,
directly_open_url: bool = True,
include_recent_events: bool = False,
**kwargs
):
"""
Create an AI agent for browser automation tasks.
Parameters:
- task: Description of the task to be performed
- llm: Language model instance (defaults to ChatOpenAI(model='gpt-4o-mini'))
- browser_profile: Browser configuration settings
- browser_session: Existing browser session to use
- browser: Alias for browser_session parameter
- tools: Custom tools/actions registry
- controller: Alias for tools parameter
- sensitive_data: Credentials and sensitive information for the agent
- initial_actions: Actions to execute before main task
- register_new_step_callback: Callback for new step events
- register_done_callback: Callback for task completion events
- register_external_agent_status_raise_error_callback: Callback for external status checks
- output_model_schema: Schema for structured output
- use_vision: Enable vision capabilities for screenshot analysis
- save_conversation_path: Path to save conversation history
- save_conversation_path_encoding: Encoding for saved conversation files
- max_failures: Maximum consecutive failures before stopping
- override_system_message: Replace default system prompt
- extend_system_message: Add to default system prompt
- generate_gif: Generate GIF recording of agent actions
- available_file_paths: Files available to the agent
- include_attributes: DOM attributes to include in element descriptions
- max_actions_per_step: Maximum actions per execution step
- use_thinking: Enable internal reasoning mode
- flash_mode: Enable faster execution mode with reduced prompting
- max_history_items: Maximum history items to keep in memory
- page_extraction_llm: Separate LLM for page content extraction
- injected_agent_state: Pre-configured agent state for advanced usage
- source: Source identifier for tracking
- file_system_path: Path to agent file system
- task_id: Unique identifier for the task
- cloud_sync: Cloud synchronization service instance
- calculate_cost: Calculate and track API costs
- display_files_in_done_text: Show files in completion messages
- include_tool_call_examples: Include examples in tool calls
- vision_detail_level: Vision processing detail level ('auto', 'low', 'high')
- llm_timeout: LLM request timeout in seconds
- step_timeout: Step execution timeout in seconds
- directly_open_url: Open URLs directly without confirmation
- include_recent_events: Include recent browser events in context
- **kwargs: Additional configuration parameters
"""Primary methods for running agent tasks with both asynchronous and synchronous interfaces.
async def run(self, max_steps: int = 100) -> AgentHistoryList:
"""
Execute the agent task asynchronously.
Parameters:
- max_steps: Maximum number of execution steps
Returns:
AgentHistoryList: Complete execution history with results
"""
def run_sync(self, max_steps: int = 100) -> AgentHistoryList:
"""
Execute the agent task synchronously.
Parameters:
- max_steps: Maximum number of execution steps
Returns:
AgentHistoryList: Complete execution history with results
"""Fine-grained control over agent execution for debugging and custom workflows.
async def step(self, step_info: AgentStepInfo = None) -> None:
"""
Execute a single step of the agent task.
Parameters:
- step_info: Optional step information for context
"""
async def take_step(self, step_info: AgentStepInfo = None) -> tuple[bool, bool]:
"""
Take a step and return completion status.
Parameters:
- step_info: Optional step information for context
Returns:
tuple[bool, bool]: (is_done, is_valid)
"""Methods for dynamic task modification and execution control.
def add_new_task(self, new_task: str) -> None:
"""
Add a new task to the agent's task list.
Parameters:
- new_task: Additional task description
"""
def pause() -> None:
"""Pause agent execution."""
def resume() -> None:
"""Resume paused agent execution."""
def stop() -> None:
"""Stop agent execution immediately."""Methods for saving, loading, and managing execution history.
def save_history(self, file_path: str | Path = None) -> None:
"""
Save execution history to file.
Parameters:
- file_path: Path to save history (optional)
"""
async def load_and_rerun(
self,
history_file: str | Path = None
) -> list[ActionResult]:
"""
Load and replay execution history.
Parameters:
- history_file: Path to history file to replay
Returns:
list[ActionResult]: Results from replayed actions
"""
async def close(self) -> None:
"""Clean up resources and close connections."""Advanced prompt engineering capabilities for customizing agent behavior.
class SystemPrompt:
def __init__(
self,
action_description: str,
max_actions_per_step: int = 10,
override_system_message: str = None,
extend_system_message: str = None,
use_thinking: bool = True,
flash_mode: bool = False
):
"""
Manage system prompts for agent behavior.
Parameters:
- action_description: Description of available actions
- max_actions_per_step: Maximum actions per step
- override_system_message: Replace default system message
- extend_system_message: Add to default system message
- use_thinking: Enable thinking mode
- flash_mode: Enable flash mode
"""
def get_system_message(self) -> SystemMessage:
"""Get formatted system prompt message."""from browser_use import Agent, ChatOpenAI
# Simple task execution
agent = Agent(
task="Go to Google and search for 'Python programming'",
llm=ChatOpenAI(model="gpt-4o")
)
result = agent.run_sync()
print(f"Task completed: {result.is_done()}")
print(f"Final result: {result.final_result()}")from browser_use import Agent, BrowserProfile, Tools, ChatAnthropic
# Custom browser profile
profile = BrowserProfile(
headless=False,
user_data_dir="/tmp/browser-data",
allowed_domains=["*.github.com", "*.stackoverflow.com"]
)
# Custom tools with exclusions
tools = Tools(exclude_actions=["search_google"])
# Agent with advanced configuration
agent = Agent(
task="Navigate to GitHub and find Python repositories",
llm=ChatAnthropic(model="claude-3-sonnet-20240229"),
browser_profile=profile,
tools=tools,
use_vision=True,
max_failures=5,
generate_gif=True,
extend_system_message="Be extra careful with form submissions."
)
result = await agent.run(max_steps=50)from pydantic import BaseModel
from browser_use import Agent
class SearchResult(BaseModel):
title: str
url: str
description: str
agent = Agent(
task="Search for AI research papers and extract details",
output_model_schema=SearchResult
)
result = agent.run_sync()
structured_data = result.final_result() # Returns SearchResult instancefrom browser_use import Agent
agent = Agent(task="Multi-step web scraping task")
# Execute step by step for debugging
while not agent.is_done():
await agent.step()
print(f"Current step: {agent.current_step}")
if agent.has_error():
print(f"Error: {agent.last_error}")
break
# Save progress
agent.save_history("execution_log.json")from browser_use import Agent
agent = Agent(task="Replay previous execution")
results = await agent.load_and_rerun("execution_log.json")
for result in results:
print(f"Action: {result.action}, Success: {result.success}")from typing import Any, Optional
from pathlib import Path
class AgentStepInfo:
"""Information context for agent step execution."""
pass
class SystemMessage:
"""Formatted system message for LLM prompting."""
content: strInstall with Tessl CLI
npx tessl i tessl/pypi-browser-use