tessl/pypi-browser-use

AI-powered browser automation library that enables language models to control web browsers for automated tasks

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

LLM Integration

Name: tessl/pypi-browser-use
Author: tessl

Multi-provider language model support with consistent interfaces for OpenAI, Anthropic, Google, Groq, Azure OpenAI, and Ollama models. All chat models implement the BaseChatModel protocol for seamless integration with browser-use agents.

Capabilities

OpenAI Integration

OpenAI GPT model integration with support for GPT-4, GPT-3.5, and other OpenAI models.

class ChatOpenAI:
    def __init__(
        self,
        model: str = "gpt-4o-mini",
        temperature: float = 0.2,
        frequency_penalty: float = 0.3,
        presence_penalty: float = 0.0,
        max_tokens: int = None,
        api_key: str = None,
        base_url: str = None,
        timeout: float = 60.0
    ):
        """
        Initialize OpenAI chat model.

        Parameters:
        - model: OpenAI model name (e.g., "gpt-4o", "gpt-4o-mini", "gpt-3.5-turbo")
        - temperature: Randomness in generation (0.0-2.0)
        - frequency_penalty: Penalty for frequent tokens (-2.0 to 2.0)
        - presence_penalty: Penalty for token presence (-2.0 to 2.0)
        - max_tokens: Maximum tokens in response
        - api_key: OpenAI API key (uses OPENAI_API_KEY env var if not provided)
        - base_url: Custom API base URL
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "openai"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """
        Invoke OpenAI model with messages.

        Parameters:
        - messages: List of conversation messages
        - output_format: Optional Pydantic model for structured output

        Returns:
        ChatInvokeCompletion: Model response with content and metadata
        """

Anthropic Integration

Anthropic Claude model integration with support for Claude 3 family models.

class ChatAnthropic:
    def __init__(
        self,
        model: str = "claude-3-sonnet-20240229",
        temperature: float = 0.2,
        max_tokens: int = 4096,
        api_key: str = None,
        timeout: float = 60.0
    ):
        """
        Initialize Anthropic Claude model.

        Parameters:
        - model: Claude model name (e.g., "claude-3-sonnet-20240229", "claude-3-haiku-20240307")
        - temperature: Randomness in generation (0.0-1.0)
        - max_tokens: Maximum tokens in response
        - api_key: Anthropic API key (uses ANTHROPIC_API_KEY env var if not provided)
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "anthropic"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """Invoke Claude model with messages."""

Google Integration

Google Gemini model integration with support for Gemini Pro and other Google models.

class ChatGoogle:
    def __init__(
        self,
        model: str = "gemini-pro",
        temperature: float = 0.2,
        max_tokens: int = None,
        api_key: str = None,
        timeout: float = 60.0
    ):
        """
        Initialize Google Gemini model.

        Parameters:
        - model: Gemini model name (e.g., "gemini-pro", "gemini-pro-vision")
        - temperature: Randomness in generation (0.0-1.0)
        - max_tokens: Maximum tokens in response
        - api_key: Google API key (uses GOOGLE_API_KEY env var if not provided)
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "google"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """Invoke Gemini model with messages."""

Groq Integration

Groq model integration for fast inference with Llama, Mixtral, and other supported models.

class ChatGroq:
    def __init__(
        self,
        model: str = "llama3-70b-8192",
        temperature: float = 0.2,
        max_tokens: int = None,
        api_key: str = None,
        timeout: float = 60.0
    ):
        """
        Initialize Groq model.

        Parameters:
        - model: Groq model name (e.g., "llama3-70b-8192", "mixtral-8x7b-32768")
        - temperature: Randomness in generation (0.0-2.0)
        - max_tokens: Maximum tokens in response
        - api_key: Groq API key (uses GROQ_API_KEY env var if not provided)
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "groq"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """Invoke Groq model with messages."""

Azure OpenAI Integration

Azure OpenAI service integration for enterprise OpenAI model deployment.

class ChatAzureOpenAI:
    def __init__(
        self,
        model: str,
        azure_endpoint: str,
        api_version: str = "2024-02-15-preview",
        temperature: float = 0.2,
        frequency_penalty: float = 0.3,
        max_tokens: int = None,
        api_key: str = None,
        timeout: float = 60.0
    ):
        """
        Initialize Azure OpenAI model.

        Parameters:
        - model: Azure deployment name
        - azure_endpoint: Azure OpenAI endpoint URL
        - api_version: Azure OpenAI API version
        - temperature: Randomness in generation (0.0-2.0)
        - frequency_penalty: Penalty for frequent tokens (-2.0 to 2.0)
        - max_tokens: Maximum tokens in response
        - api_key: Azure OpenAI API key
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "azure_openai"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """Invoke Azure OpenAI model with messages."""

Ollama Integration

Local model integration using Ollama for running models locally.

class ChatOllama:
    def __init__(
        self,
        model: str = "llama2",
        temperature: float = 0.2,
        base_url: str = "http://localhost:11434",
        timeout: float = 120.0
    ):
        """
        Initialize Ollama local model.

        Parameters:
        - model: Ollama model name (e.g., "llama2", "codellama", "mistral")
        - temperature: Randomness in generation (0.0-1.0)
        - base_url: Ollama server URL
        - timeout: Request timeout in seconds
        """

    model: str
    provider: str = "ollama"

    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """Invoke local Ollama model with messages."""

Base Chat Model Protocol

Protocol defining the interface that all chat models must implement.

from typing import Protocol, TypeVar
from abc import abstractmethod

T = TypeVar('T')

class BaseChatModel(Protocol):
    """Protocol for chat model implementations."""
    
    model: str
    provider: str

    @abstractmethod
    async def ainvoke(
        self,
        messages: list[BaseMessage],
        output_format: type[T] = None
    ) -> ChatInvokeCompletion:
        """
        Invoke the chat model with messages.

        Parameters:
        - messages: Conversation messages
        - output_format: Optional structured output format

        Returns:
        ChatInvokeCompletion: Model response
        """

Message Types

Message types for structured conversation handling.

class BaseMessage:
    """Base class for conversation messages."""
    content: str
    role: str

class SystemMessage(BaseMessage):
    """System message for model prompting."""
    role: str = "system"

class HumanMessage(BaseMessage):
    """Human/user message."""
    role: str = "user"

class AIMessage(BaseMessage):
    """AI assistant message."""
    role: str = "assistant"

class ChatInvokeCompletion:
    """Chat model response."""
    content: str
    model: str
    usage: dict[str, int]
    finish_reason: str

Usage Examples

Basic Model Usage

from browser_use import Agent, ChatOpenAI, ChatAnthropic, ChatGoogle

# OpenAI GPT-4
agent = Agent(
    task="Search for Python tutorials",
    llm=ChatOpenAI(model="gpt-4o", temperature=0.1)
)

# Anthropic Claude
agent = Agent(
    task="Analyze web page content",
    llm=ChatAnthropic(model="claude-3-sonnet-20240229")
)

# Google Gemini
agent = Agent(
    task="Extract structured data",
    llm=ChatGoogle(model="gemini-pro")
)

Custom Model Configuration

from browser_use import ChatOpenAI, ChatGroq, ChatOllama

# Custom OpenAI configuration
openai_model = ChatOpenAI(
    model="gpt-4o",
    temperature=0.0,  # Deterministic output
    frequency_penalty=0.5,  # Reduce repetition
    max_tokens=2000,
    timeout=30.0
)

# Fast inference with Groq
groq_model = ChatGroq(
    model="llama3-70b-8192",
    temperature=0.3,
    max_tokens=4000
)

# Local model with Ollama
local_model = ChatOllama(
    model="codellama:13b",
    temperature=0.1,
    base_url="http://localhost:11434"
)

Azure OpenAI Enterprise Setup

from browser_use import ChatAzureOpenAI, Agent

# Azure OpenAI configuration
azure_model = ChatAzureOpenAI(
    model="gpt-4-deployment",  # Your Azure deployment name
    azure_endpoint="https://your-resource.openai.azure.com/",
    api_version="2024-02-15-preview",
    api_key="your-azure-api-key",
    temperature=0.2
)

agent = Agent(
    task="Enterprise browser automation task",
    llm=azure_model
)

Model Comparison Workflow

from browser_use import Agent, ChatOpenAI, ChatAnthropic, ChatGoogle

task = "Analyze this webpage and extract key information"

# Test with different models
models = [
    ChatOpenAI(model="gpt-4o"),
    ChatAnthropic(model="claude-3-sonnet-20240229"),
    ChatGoogle(model="gemini-pro")
]

results = []
for model in models:
    agent = Agent(task=task, llm=model)
    result = agent.run_sync()
    results.append({
        'provider': model.provider,
        'model': model.model,
        'result': result.final_result(),
        'success': result.is_successful()
    })

# Compare results
for result in results:
    print(f"{result['provider']}: {result['success']}")

Structured Output with Models

from browser_use import Agent, ChatOpenAI
from pydantic import BaseModel

class WebPageInfo(BaseModel):
    title: str
    main_content: str
    links: list[str]
    images: list[str]

# Model with structured output
agent = Agent(
    task="Extract structured information from webpage",
    llm=ChatOpenAI(model="gpt-4o"),
    output_model_schema=WebPageInfo
)

result = agent.run_sync()
webpage_info = result.final_result()  # Returns WebPageInfo instance
print(f"Title: {webpage_info.title}")
print(f"Links found: {len(webpage_info.links)}")

Error Handling and Fallbacks

from browser_use import Agent, ChatOpenAI, ChatAnthropic, LLMException

primary_model = ChatOpenAI(model="gpt-4o")
fallback_model = ChatAnthropic(model="claude-3-haiku-20240307")

try:
    agent = Agent(task="Complex task", llm=primary_model)
    result = agent.run_sync()
except LLMException as e:
    print(f"Primary model failed: {e}")
    # Fallback to alternative model
    agent = Agent(task="Complex task", llm=fallback_model)
    result = agent.run_sync()

Local Model Setup

from browser_use import ChatOllama, Agent

# Ensure Ollama is running: ollama serve
# Pull model: ollama pull llama2

local_model = ChatOllama(
    model="llama2:13b",
    temperature=0.1,
    base_url="http://localhost:11434"
)

agent = Agent(
    task="Local browser automation task",
    llm=local_model
)

# Works offline with local inference
result = agent.run_sync()