AI-powered browser automation library that enables language models to control web browsers for automated tasks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Multi-provider language model support with consistent interfaces for OpenAI, Anthropic, Google, Groq, Azure OpenAI, and Ollama models. All chat models implement the BaseChatModel protocol for seamless integration with browser-use agents.
OpenAI GPT model integration with support for GPT-4, GPT-3.5, and other OpenAI models.
class ChatOpenAI:
def __init__(
self,
model: str = "gpt-4o-mini",
temperature: float = 0.2,
frequency_penalty: float = 0.3,
presence_penalty: float = 0.0,
max_tokens: int = None,
api_key: str = None,
base_url: str = None,
timeout: float = 60.0
):
"""
Initialize OpenAI chat model.
Parameters:
- model: OpenAI model name (e.g., "gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo")
- temperature: Randomness in generation (0.0-2.0)
- frequency_penalty: Penalty for frequent tokens (-2.0 to 2.0)
- presence_penalty: Penalty for token presence (-2.0 to 2.0)
- max_tokens: Maximum tokens in response
- api_key: OpenAI API key (uses OPENAI_API_KEY env var if not provided)
- base_url: Custom API base URL
- timeout: Request timeout in seconds
"""
model: str
provider: str = "openai"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""
Invoke OpenAI model with messages.
Parameters:
- messages: List of conversation messages
- output_format: Optional Pydantic model for structured output
Returns:
ChatInvokeCompletion: Model response with content and metadata
"""Anthropic Claude model integration with support for Claude 3 family models.
class ChatAnthropic:
def __init__(
self,
model: str = "claude-3-sonnet-20240229",
temperature: float = 0.2,
max_tokens: int = 4096,
api_key: str = None,
timeout: float = 60.0
):
"""
Initialize Anthropic Claude model.
Parameters:
- model: Claude model name (e.g., "claude-3-sonnet-20240229", "claude-3-haiku-20240307")
- temperature: Randomness in generation (0.0-1.0)
- max_tokens: Maximum tokens in response
- api_key: Anthropic API key (uses ANTHROPIC_API_KEY env var if not provided)
- timeout: Request timeout in seconds
"""
model: str
provider: str = "anthropic"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""Invoke Claude model with messages."""Google Gemini model integration with support for Gemini Pro and other Google models.
class ChatGoogle:
def __init__(
self,
model: str = "gemini-pro",
temperature: float = 0.2,
max_tokens: int = None,
api_key: str = None,
timeout: float = 60.0
):
"""
Initialize Google Gemini model.
Parameters:
- model: Gemini model name (e.g., "gemini-pro", "gemini-pro-vision")
- temperature: Randomness in generation (0.0-1.0)
- max_tokens: Maximum tokens in response
- api_key: Google API key (uses GOOGLE_API_KEY env var if not provided)
- timeout: Request timeout in seconds
"""
model: str
provider: str = "google"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""Invoke Gemini model with messages."""Groq model integration for fast inference with Llama, Mixtral, and other supported models.
class ChatGroq:
def __init__(
self,
model: str = "llama3-70b-8192",
temperature: float = 0.2,
max_tokens: int = None,
api_key: str = None,
timeout: float = 60.0
):
"""
Initialize Groq model.
Parameters:
- model: Groq model name (e.g., "llama3-70b-8192", "mixtral-8x7b-32768")
- temperature: Randomness in generation (0.0-2.0)
- max_tokens: Maximum tokens in response
- api_key: Groq API key (uses GROQ_API_KEY env var if not provided)
- timeout: Request timeout in seconds
"""
model: str
provider: str = "groq"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""Invoke Groq model with messages."""Azure OpenAI service integration for enterprise OpenAI model deployment.
class ChatAzureOpenAI:
def __init__(
self,
model: str,
azure_endpoint: str,
api_version: str = "2024-02-15-preview",
temperature: float = 0.2,
frequency_penalty: float = 0.3,
max_tokens: int = None,
api_key: str = None,
timeout: float = 60.0
):
"""
Initialize Azure OpenAI model.
Parameters:
- model: Azure deployment name
- azure_endpoint: Azure OpenAI endpoint URL
- api_version: Azure OpenAI API version
- temperature: Randomness in generation (0.0-2.0)
- frequency_penalty: Penalty for frequent tokens (-2.0 to 2.0)
- max_tokens: Maximum tokens in response
- api_key: Azure OpenAI API key
- timeout: Request timeout in seconds
"""
model: str
provider: str = "azure_openai"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""Invoke Azure OpenAI model with messages."""Local model integration using Ollama for running models locally.
class ChatOllama:
def __init__(
self,
model: str = "llama2",
temperature: float = 0.2,
base_url: str = "http://localhost:11434",
timeout: float = 120.0
):
"""
Initialize Ollama local model.
Parameters:
- model: Ollama model name (e.g., "llama2", "codellama", "mistral")
- temperature: Randomness in generation (0.0-1.0)
- base_url: Ollama server URL
- timeout: Request timeout in seconds
"""
model: str
provider: str = "ollama"
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""Invoke local Ollama model with messages."""Protocol defining the interface that all chat models must implement.
from typing import Protocol, TypeVar
from abc import abstractmethod
T = TypeVar('T')
class BaseChatModel(Protocol):
"""Protocol for chat model implementations."""
model: str
provider: str
@abstractmethod
async def ainvoke(
self,
messages: list[BaseMessage],
output_format: type[T] = None
) -> ChatInvokeCompletion:
"""
Invoke the chat model with messages.
Parameters:
- messages: Conversation messages
- output_format: Optional structured output format
Returns:
ChatInvokeCompletion: Model response
"""Message types for structured conversation handling.
class BaseMessage:
"""Base class for conversation messages."""
content: str
role: str
class SystemMessage(BaseMessage):
"""System message for model prompting."""
role: str = "system"
class HumanMessage(BaseMessage):
"""Human/user message."""
role: str = "user"
class AIMessage(BaseMessage):
"""AI assistant message."""
role: str = "assistant"
class ChatInvokeCompletion:
"""Chat model response."""
content: str
model: str
usage: dict[str, int]
finish_reason: strfrom browser_use import Agent, ChatOpenAI, ChatAnthropic, ChatGoogle
# OpenAI GPT-4
agent = Agent(
task="Search for Python tutorials",
llm=ChatOpenAI(model="gpt-4o", temperature=0.1)
)
# Anthropic Claude
agent = Agent(
task="Analyze web page content",
llm=ChatAnthropic(model="claude-3-sonnet-20240229")
)
# Google Gemini
agent = Agent(
task="Extract structured data",
llm=ChatGoogle(model="gemini-pro")
)from browser_use import ChatOpenAI, ChatGroq, ChatOllama
# Custom OpenAI configuration
openai_model = ChatOpenAI(
model="gpt-4o",
temperature=0.0, # Deterministic output
frequency_penalty=0.5, # Reduce repetition
max_tokens=2000,
timeout=30.0
)
# Fast inference with Groq
groq_model = ChatGroq(
model="llama3-70b-8192",
temperature=0.3,
max_tokens=4000
)
# Local model with Ollama
local_model = ChatOllama(
model="codellama:13b",
temperature=0.1,
base_url="http://localhost:11434"
)from browser_use import ChatAzureOpenAI, Agent
# Azure OpenAI configuration
azure_model = ChatAzureOpenAI(
model="gpt-4-deployment", # Your Azure deployment name
azure_endpoint="https://your-resource.openai.azure.com/",
api_version="2024-02-15-preview",
api_key="your-azure-api-key",
temperature=0.2
)
agent = Agent(
task="Enterprise browser automation task",
llm=azure_model
)from browser_use import Agent, ChatOpenAI, ChatAnthropic, ChatGoogle
task = "Analyze this webpage and extract key information"
# Test with different models
models = [
ChatOpenAI(model="gpt-4o"),
ChatAnthropic(model="claude-3-sonnet-20240229"),
ChatGoogle(model="gemini-pro")
]
results = []
for model in models:
agent = Agent(task=task, llm=model)
result = agent.run_sync()
results.append({
'provider': model.provider,
'model': model.model,
'result': result.final_result(),
'success': result.is_successful()
})
# Compare results
for result in results:
print(f"{result['provider']}: {result['success']}")from browser_use import Agent, ChatOpenAI
from pydantic import BaseModel
class WebPageInfo(BaseModel):
title: str
main_content: str
links: list[str]
images: list[str]
# Model with structured output
agent = Agent(
task="Extract structured information from webpage",
llm=ChatOpenAI(model="gpt-4o"),
output_model_schema=WebPageInfo
)
result = agent.run_sync()
webpage_info = result.final_result() # Returns WebPageInfo instance
print(f"Title: {webpage_info.title}")
print(f"Links found: {len(webpage_info.links)}")from browser_use import Agent, ChatOpenAI, ChatAnthropic, LLMException
primary_model = ChatOpenAI(model="gpt-4o")
fallback_model = ChatAnthropic(model="claude-3-haiku-20240307")
try:
agent = Agent(task="Complex task", llm=primary_model)
result = agent.run_sync()
except LLMException as e:
print(f"Primary model failed: {e}")
# Fallback to alternative model
agent = Agent(task="Complex task", llm=fallback_model)
result = agent.run_sync()from browser_use import ChatOllama, Agent
# Ensure Ollama is running: ollama serve
# Pull model: ollama pull llama2
local_model = ChatOllama(
model="llama2:13b",
temperature=0.1,
base_url="http://localhost:11434"
)
agent = Agent(
task="Local browser automation task",
llm=local_model
)
# Works offline with local inference
result = agent.run_sync()Install with Tessl CLI
npx tessl i tessl/pypi-browser-use