CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-browser-use

AI-powered browser automation library that enables language models to control web browsers for automated tasks

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

browser-session.mddocs/

Browser Session Management

Browser session creation, configuration, and control for CDP-based browser automation. The BrowserSession class manages browser lifecycle and provides state access capabilities. Browser actions like navigation, clicking, and form interaction are handled through the Tools/Actions system (see Browser Actions).

Capabilities

Browser Session Control

Core browser session management with CDP protocol integration for direct browser control.

class BrowserSession:
    async def get_browser_state_summary(
        self,
        cache_clickable_elements_hashes: bool = True,
        include_screenshot: bool = True,
        cached: bool = False,
        include_recent_events: bool = False
    ) -> BrowserStateSummary:
        """
        Get current browser state including page content, URL, and available elements.

        Parameters:
        - cache_clickable_elements_hashes: Cache element hashes for performance
        - include_screenshot: Include screenshot in the state summary  
        - cached: Use cached state if available
        - include_recent_events: Include recent browser events in summary

        Returns:
        BrowserStateSummary: Current page state with DOM elements and metadata
        """

    async def get_tabs(self) -> list[TabInfo]:
        """
        Get list of all browser tabs.

        Returns:
        list[TabInfo]: List of all available browser tabs
        """

    async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
        """
        Get DOM element by its index from the element mapping.

        Parameters:
        - index: Element index from DOM serialization

        Returns:
        EnhancedDOMTreeNode | None: DOM element or None if not found
        """

    async def get_current_page_url(self) -> str:
        """
        Get URL of currently active page.

        Returns:
        str: Current page URL
        """

    async def get_current_page_title(self) -> str:
        """
        Get title of currently active page.

        Returns:
        str: Current page title
        """

    async def start(self) -> None:
        """Start the browser session."""

    async def kill(self) -> None:
        """Terminate browser session and cleanup resources."""

    async def stop(self) -> None:
        """Stop the browser session gracefully."""

Browser Profile Configuration

Comprehensive browser configuration for customizing browser behavior, security settings, and automation parameters.

class BrowserProfile:
    def __init__(
        self,
        headless: bool = False,
        user_data_dir: str = None,
        allowed_domains: list[str] = None,
        downloads_path: str = None,
        proxy: ProxySettings = None,
        keep_alive: bool = False,
        window_size: tuple[int, int] = (1920, 1080),
        viewport_size: tuple[int, int] = None,
        user_agent: str = None,
        disable_web_security: bool = False,
        disable_features: list[str] = None,
        enable_features: list[str] = None,
        extra_args: list[str] = None
    ):
        """
        Configure browser behavior and settings.

        Parameters:
        - headless: Run browser in headless mode
        - user_data_dir: Directory for browser user data
        - allowed_domains: List of allowed domains (domain restriction)
        - downloads_path: Directory for file downloads
        - proxy: Proxy server configuration
        - keep_alive: Keep browser alive after session ends
        - window_size: Browser window dimensions
        - viewport_size: Viewport dimensions (defaults to window_size)
        - user_agent: Custom user agent string
        - disable_web_security: Disable web security features
        - disable_features: Chrome features to disable
        - enable_features: Chrome features to enable
        - extra_args: Additional Chrome command line arguments
        """

    headless: bool
    user_data_dir: str
    allowed_domains: list[str]
    downloads_path: str
    proxy: ProxySettings
    keep_alive: bool

Proxy Configuration

Network proxy settings for browser sessions.

class ProxySettings:
    def __init__(
        self,
        server: str,
        username: str = None,
        password: str = None,
        bypass_list: list[str] = None
    ):
        """
        Configure proxy settings for browser session.

        Parameters:
        - server: Proxy server address (e.g., "proxy.example.com:8080")
        - username: Proxy authentication username
        - password: Proxy authentication password
        - bypass_list: List of domains to bypass proxy
        """

    server: str
    username: str
    password: str
    bypass_list: list[str]

Browser State Information

Comprehensive browser state representation for agent decision-making.

class BrowserStateSummary:
    """
    Current browser state information.
    """
    url: str  # Current page URL
    title: str  # Page title
    tabs: list[TabInfo]  # Available browser tabs
    elements: list[ElementInfo]  # Clickable/interactable elements
    text_content: str  # Page text content
    screenshot_path: str  # Path to current screenshot
    viewport_size: tuple[int, int]  # Viewport dimensions

class TabInfo:
    """Browser tab information."""
    id: str
    title: str
    url: str
    active: bool

class ElementInfo:
    """DOM element information."""
    index: int
    tag: str
    text: str
    attributes: dict[str, str]
    bounding_box: dict[str, float]

Usage Examples

Basic Browser Session

from browser_use import BrowserSession, BrowserProfile

# Create browser session with default profile
session = BrowserSession()

# Navigate and interact
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()
print(f"Page title: {state.title}")

# Cleanup
await session.kill()

Custom Browser Profile

from browser_use import BrowserSession, BrowserProfile, ProxySettings

# Configure proxy
proxy = ProxySettings(
    server="proxy.company.com:8080",
    username="user",
    password="pass",
    bypass_list=["*.local", "127.0.0.1"]
)

# Create custom profile
profile = BrowserProfile(
    headless=True,
    user_data_dir="/tmp/browser-data",
    allowed_domains=["*.example.com", "*.trusted-site.org"],
    downloads_path="/tmp/downloads",
    proxy=proxy,
    window_size=(1440, 900),
    user_agent="CustomBot/1.0"
)

# Create session with profile
session = BrowserSession(browser_profile=profile)

await session.navigate_to_url("https://example.com")

Element Interaction

from browser_use import BrowserSession

session = BrowserSession()
await session.navigate_to_url("https://example.com/form")

# Get current state
state = await session.get_browser_state_summary()

# Find search input (assuming it's element index 5)
search_input_index = 5
await session.input_text(search_input_index, "search query")

# Find and click search button (assuming it's element index 8)
search_button_index = 8
await session.click_element(search_button_index)

# Scroll down to see more results
await session.scroll(down=True, num_pages=2)

Multi-Tab Management

from browser_use import BrowserSession

session = BrowserSession()

# Navigate and open multiple tabs
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()

# Switch between tabs
for tab in state.tabs:
    print(f"Tab: {tab.title} - {tab.url}")
    if not tab.active:
        await session.switch_tab(tab.id)
        # Do something in this tab
        break

# Close unnecessary tabs
for tab in state.tabs:
    if "unwanted" in tab.title.lower():
        await session.close_tab(tab.id)

Session Persistence

from browser_use import BrowserSession, BrowserProfile

# Create persistent browser session
profile = BrowserProfile(
    keep_alive=True,
    user_data_dir="/persistent/browser/data"
)

session = BrowserSession(browser_profile=profile)

# Use session for multiple tasks
await session.navigate_to_url("https://site1.com")
# ... perform tasks ...

await session.navigate_to_url("https://site2.com")
# ... perform more tasks ...

# Session data persists for future use

Browser Constants

# Default browser configuration values
DEFAULT_BROWSER_PROFILE: BrowserProfile
CHROME_DEBUG_PORT: int = 9242
CHROME_DISABLED_COMPONENTS: list[str]
CHROME_HEADLESS_ARGS: list[str]
CHROME_DOCKER_ARGS: list[str]

# Screenshot and viewport limits
MAX_SCREENSHOT_HEIGHT: int = 2000
MAX_SCREENSHOT_WIDTH: int = 1920

Install with Tessl CLI

npx tessl i tessl/pypi-browser-use

docs

agent-orchestration.md

browser-actions.md

browser-session.md

dom-processing.md

index.md

llm-integration.md

task-results.md

tile.json