tessl/pypi-browser-use

AI-powered browser automation library that enables language models to control web browsers for automated tasks

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Browser Session Management

Name: tessl/pypi-browser-use
Author: tessl

Browser session creation, configuration, and control for CDP-based browser automation. The BrowserSession class manages browser lifecycle and provides state access capabilities. Browser actions like navigation, clicking, and form interaction are handled through the Tools/Actions system (see Browser Actions).

Capabilities

Browser Session Control

Core browser session management with CDP protocol integration for direct browser control.

class BrowserSession:
    async def get_browser_state_summary(
        self,
        cache_clickable_elements_hashes: bool = True,
        include_screenshot: bool = True,
        cached: bool = False,
        include_recent_events: bool = False
    ) -> BrowserStateSummary:
        """
        Get current browser state including page content, URL, and available elements.

        Parameters:
        - cache_clickable_elements_hashes: Cache element hashes for performance
        - include_screenshot: Include screenshot in the state summary  
        - cached: Use cached state if available
        - include_recent_events: Include recent browser events in summary

        Returns:
        BrowserStateSummary: Current page state with DOM elements and metadata
        """

    async def get_tabs(self) -> list[TabInfo]:
        """
        Get list of all browser tabs.

        Returns:
        list[TabInfo]: List of all available browser tabs
        """

    async def get_element_by_index(self, index: int) -> EnhancedDOMTreeNode | None:
        """
        Get DOM element by its index from the element mapping.

        Parameters:
        - index: Element index from DOM serialization

        Returns:
        EnhancedDOMTreeNode | None: DOM element or None if not found
        """

    async def get_current_page_url(self) -> str:
        """
        Get URL of currently active page.

        Returns:
        str: Current page URL
        """

    async def get_current_page_title(self) -> str:
        """
        Get title of currently active page.

        Returns:
        str: Current page title
        """

    async def start(self) -> None:
        """Start the browser session."""

    async def kill(self) -> None:
        """Terminate browser session and cleanup resources."""

    async def stop(self) -> None:
        """Stop the browser session gracefully."""

Browser Profile Configuration

Comprehensive browser configuration for customizing browser behavior, security settings, and automation parameters.

class BrowserProfile:
    def __init__(
        self,
        headless: bool = False,
        user_data_dir: str = None,
        allowed_domains: list[str] = None,
        downloads_path: str = None,
        proxy: ProxySettings = None,
        keep_alive: bool = False,
        window_size: tuple[int, int] = (1920, 1080),
        viewport_size: tuple[int, int] = None,
        user_agent: str = None,
        disable_web_security: bool = False,
        disable_features: list[str] = None,
        enable_features: list[str] = None,
        extra_args: list[str] = None
    ):
        """
        Configure browser behavior and settings.

        Parameters:
        - headless: Run browser in headless mode
        - user_data_dir: Directory for browser user data
        - allowed_domains: List of allowed domains (domain restriction)
        - downloads_path: Directory for file downloads
        - proxy: Proxy server configuration
        - keep_alive: Keep browser alive after session ends
        - window_size: Browser window dimensions
        - viewport_size: Viewport dimensions (defaults to window_size)
        - user_agent: Custom user agent string
        - disable_web_security: Disable web security features
        - disable_features: Chrome features to disable
        - enable_features: Chrome features to enable
        - extra_args: Additional Chrome command line arguments
        """

    headless: bool
    user_data_dir: str
    allowed_domains: list[str]
    downloads_path: str
    proxy: ProxySettings
    keep_alive: bool

Proxy Configuration

Network proxy settings for browser sessions.

class ProxySettings:
    def __init__(
        self,
        server: str,
        username: str = None,
        password: str = None,
        bypass_list: list[str] = None
    ):
        """
        Configure proxy settings for browser session.

        Parameters:
        - server: Proxy server address (e.g., "proxy.example.com:8080")
        - username: Proxy authentication username
        - password: Proxy authentication password
        - bypass_list: List of domains to bypass proxy
        """

    server: str
    username: str
    password: str
    bypass_list: list[str]

Browser State Information

Comprehensive browser state representation for agent decision-making.

class BrowserStateSummary:
    """
    Current browser state information.
    """
    url: str  # Current page URL
    title: str  # Page title
    tabs: list[TabInfo]  # Available browser tabs
    elements: list[ElementInfo]  # Clickable/interactable elements
    text_content: str  # Page text content
    screenshot_path: str  # Path to current screenshot
    viewport_size: tuple[int, int]  # Viewport dimensions

class TabInfo:
    """Browser tab information."""
    id: str
    title: str
    url: str
    active: bool

class ElementInfo:
    """DOM element information."""
    index: int
    tag: str
    text: str
    attributes: dict[str, str]
    bounding_box: dict[str, float]

Usage Examples

Basic Browser Session

from browser_use import BrowserSession, BrowserProfile

# Create browser session with default profile
session = BrowserSession()

# Navigate and interact
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()
print(f"Page title: {state.title}")

# Cleanup
await session.kill()

Custom Browser Profile

from browser_use import BrowserSession, BrowserProfile, ProxySettings

# Configure proxy
proxy = ProxySettings(
    server="proxy.company.com:8080",
    username="user",
    password="pass",
    bypass_list=["*.local", "127.0.0.1"]
)

# Create custom profile
profile = BrowserProfile(
    headless=True,
    user_data_dir="/tmp/browser-data",
    allowed_domains=["*.example.com", "*.trusted-site.org"],
    downloads_path="/tmp/downloads",
    proxy=proxy,
    window_size=(1440, 900),
    user_agent="CustomBot/1.0"
)

# Create session with profile
session = BrowserSession(browser_profile=profile)

await session.navigate_to_url("https://example.com")

Element Interaction

from browser_use import BrowserSession

session = BrowserSession()
await session.navigate_to_url("https://example.com/form")

# Get current state
state = await session.get_browser_state_summary()

# Find search input (assuming it's element index 5)
search_input_index = 5
await session.input_text(search_input_index, "search query")

# Find and click search button (assuming it's element index 8)
search_button_index = 8
await session.click_element(search_button_index)

# Scroll down to see more results
await session.scroll(down=True, num_pages=2)

Multi-Tab Management

from browser_use import BrowserSession

session = BrowserSession()

# Navigate and open multiple tabs
await session.navigate_to_url("https://example.com")
state = await session.get_browser_state_summary()

# Switch between tabs
for tab in state.tabs:
    print(f"Tab: {tab.title} - {tab.url}")
    if not tab.active:
        await session.switch_tab(tab.id)
        # Do something in this tab
        break

# Close unnecessary tabs
for tab in state.tabs:
    if "unwanted" in tab.title.lower():
        await session.close_tab(tab.id)

Session Persistence

from browser_use import BrowserSession, BrowserProfile

# Create persistent browser session
profile = BrowserProfile(
    keep_alive=True,
    user_data_dir="/persistent/browser/data"
)

session = BrowserSession(browser_profile=profile)

# Use session for multiple tasks
await session.navigate_to_url("https://site1.com")
# ... perform tasks ...

await session.navigate_to_url("https://site2.com")
# ... perform more tasks ...

# Session data persists for future use

Browser Constants

# Default browser configuration values
DEFAULT_BROWSER_PROFILE: BrowserProfile
CHROME_DEBUG_PORT: int = 9242
CHROME_DISABLED_COMPONENTS: list[str]
CHROME_HEADLESS_ARGS: list[str]
CHROME_DOCKER_ARGS: list[str]

# Screenshot and viewport limits
MAX_SCREENSHOT_HEIGHT: int = 2000
MAX_SCREENSHOT_WIDTH: int = 1920

Install with Tessl CLI