tessl/pypi-browser-use

AI-powered browser automation library that enables language models to control web browsers for automated tasks

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Task Results and History

Name: tessl/pypi-browser-use
Author: tessl

Comprehensive result tracking, history management, and execution analysis including success/failure detection, error handling, and workflow replay capabilities. The ActionResult and AgentHistoryList classes provide detailed insight into agent task execution.

Capabilities

Action Results

Individual action execution results with comprehensive success/failure information and extracted content.

class ActionResult:
    def __init__(
        self,
        is_done: bool = None,
        success: bool = None,
        error: str = None,
        extracted_content: str = None,
        attachments: list[str] = None,
        long_term_memory: str = None,
        metadata: dict = None
    ):
        """
        Result of executing a browser action.

        Parameters:
        - is_done: Whether the overall task is complete
        - success: Whether this specific action succeeded
        - error: Error message if action failed
        - extracted_content: Content extracted from the page
        - attachments: List of file paths attached to result
        - long_term_memory: Information to store for future reference
        - metadata: Additional metadata about the action
        """

    is_done: bool = None
    success: bool = None
    error: str = None
    extracted_content: str = None
    attachments: list[str] = None
    long_term_memory: str = None
    metadata: dict = None

    def to_dict(self) -> dict:
        """Convert result to dictionary representation."""

    @classmethod
    def from_dict(cls, data: dict) -> 'ActionResult':
        """Create ActionResult from dictionary."""

    def __str__(self) -> str:
        """String representation of the result."""

Agent History Management

Comprehensive execution history with analysis methods for understanding agent behavior and task outcomes.

class AgentHistoryList:
    def __init__(self, history: list[AgentHistoryItem] = None):
        """
        List of agent execution history with analysis capabilities.

        Parameters:
        - history: List of historical execution items
        """

    def is_done(self) -> bool:
        """
        Check if the task was completed.

        Returns:
        bool: True if task is marked as done
        """

    def is_successful(self) -> bool | None:
        """
        Check if the task completed successfully.

        Returns:
        bool | None: True if successful, False if failed, None if not done
        """

    def final_result(self) -> str | None:
        """
        Get the final result text from task completion.

        Returns:
        str | None: Final result message or None if not completed
        """

    def errors(self) -> list[str | None]:
        """
        Get all error messages from execution history.

        Returns:
        list[str | None]: List of error messages
        """

    def urls(self) -> list[str | None]:
        """
        Get all URLs visited during execution.

        Returns:
        list[str | None]: List of visited URLs
        """

    def screenshots(self, n_last: int = None) -> list[str | None]:
        """
        Get screenshot paths from execution history.

        Parameters:
        - n_last: Number of recent screenshots to return (all if None)

        Returns:
        list[str | None]: List of screenshot file paths
        """

    def action_names(self) -> list[str]:
        """
        Get names of all actions executed.

        Returns:
        list[str]: List of action names in execution order
        """

    def total_duration_seconds(self) -> float:
        """
        Get total execution time in seconds.

        Returns:
        float: Total duration of task execution
        """

    def save_to_file(self, filepath: str | Path) -> None:
        """
        Save execution history to JSON file.

        Parameters:
        - filepath: Path where to save the history file
        """

    @classmethod
    def load_from_file(
        cls,
        filepath: str | Path,
        output_model: type = None
    ) -> 'AgentHistoryList':
        """
        Load execution history from JSON file.

        Parameters:
        - filepath: Path to history file
        - output_model: Optional output model type for structured data

        Returns:
        AgentHistoryList: Loaded history with analysis capabilities
        """

    def filter_by_action(self, action_name: str) -> 'AgentHistoryList':
        """
        Filter history to only include specific action type.

        Parameters:
        - action_name: Name of action to filter by

        Returns:
        AgentHistoryList: Filtered history
        """

    def filter_by_success(self, success: bool) -> 'AgentHistoryList':
        """
        Filter history by success/failure status.

        Parameters:
        - success: True for successful actions, False for failures

        Returns:
        AgentHistoryList: Filtered history
        """

    def get_statistics(self) -> ExecutionStatistics:
        """
        Get detailed execution statistics.

        Returns:
        ExecutionStatistics: Comprehensive execution metrics
        """

History Item Structure

Individual items in the execution history with detailed action information.

class AgentHistoryItem:
    """Individual item in agent execution history."""
    
    timestamp: datetime
    action_name: str
    action_parameters: dict
    result: ActionResult
    browser_state: BrowserStateSummary
    screenshot_path: str
    step_number: int
    duration_seconds: float
    model_thinking: str  # LLM reasoning process
    model_response: str  # Raw LLM response

    def to_dict(self) -> dict:
        """Convert history item to dictionary."""

    @classmethod
    def from_dict(cls, data: dict) -> 'AgentHistoryItem':
        """Create history item from dictionary."""

Execution Statistics

Detailed metrics and analysis of agent execution performance.

class ExecutionStatistics:
    """Comprehensive execution statistics."""
    
    total_actions: int
    successful_actions: int
    failed_actions: int
    success_rate: float
    total_duration: float
    average_action_duration: float
    actions_by_type: dict[str, int]
    error_types: dict[str, int]
    urls_visited: list[str]
    screenshots_taken: int
    
    # Performance metrics
    fastest_action: float
    slowest_action: float
    most_common_action: str
    most_common_error: str
    
    # Task completion metrics
    steps_to_completion: int
    completion_efficiency: float  # success_rate / steps_to_completion
    retry_count: int
    
    def to_dict(self) -> dict:
        """Convert statistics to dictionary."""

    def summary_report(self) -> str:
        """Generate human-readable summary report."""

History Analysis Tools

Advanced analysis tools for understanding execution patterns and optimizing agent performance.

class HistoryAnalyzer:
    """Advanced analysis tools for execution history."""
    
    def __init__(self, history: AgentHistoryList):
        """
        Initialize analyzer with execution history.

        Parameters:
        - history: Agent execution history to analyze
        """

    def identify_failure_patterns(self) -> list[FailurePattern]:
        """
        Identify common failure patterns in execution.

        Returns:
        list[FailurePattern]: Common failure patterns found
        """

    def suggest_optimizations(self) -> list[OptimizationSuggestion]:
        """
        Suggest optimizations based on execution analysis.

        Returns:
        list[OptimizationSuggestion]: Recommended optimizations
        """

    def compare_with_baseline(
        self,
        baseline_history: AgentHistoryList
    ) -> ComparisonReport:
        """
        Compare current execution with baseline performance.

        Parameters:
        - baseline_history: Baseline execution history

        Returns:
        ComparisonReport: Detailed performance comparison
        """

    def generate_replay_script(self) -> str:
        """
        Generate script to replay successful execution path.

        Returns:
        str: Python script for replaying execution
        """

class FailurePattern:
    """Common failure pattern in execution."""
    pattern_type: str
    frequency: int
    actions_involved: list[str]
    common_errors: list[str]
    suggested_fix: str

class OptimizationSuggestion:
    """Suggested optimization for agent performance."""
    category: str  # 'speed', 'reliability', 'efficiency'
    description: str
    expected_improvement: str
    implementation_difficulty: str  # 'easy', 'medium', 'hard'

class ComparisonReport:
    """Performance comparison between executions."""
    performance_change: float  # Percentage change
    speed_change: float
    reliability_change: float
    new_issues: list[str]
    resolved_issues: list[str]
    overall_assessment: str

Usage Examples

Basic Result Handling

from browser_use import Agent, ActionResult

agent = Agent(task="Search for information")
history = agent.run_sync()

# Check overall success
if history.is_successful():
    print(f"Task completed successfully!")
    print(f"Final result: {history.final_result()}")
else:
    print("Task failed:")
    for error in history.errors():
        if error:
            print(f"  - {error}")

# Get execution statistics
print(f"Total actions: {len(history.action_names())}")
print(f"Duration: {history.total_duration_seconds():.2f} seconds")
print(f"URLs visited: {len(history.urls())}")

Detailed History Analysis

from browser_use import Agent, HistoryAnalyzer

agent = Agent(task="Complex multi-step task")
history = agent.run_sync()

# Get detailed statistics
stats = history.get_statistics()
print(f"Success rate: {stats.success_rate:.2%}")
print(f"Average action duration: {stats.average_action_duration:.2f}s")
print(f"Most common action: {stats.most_common_action}")

# Action breakdown
print("\nActions by type:")
for action, count in stats.actions_by_type.items():
    print(f"  {action}: {count}")

# Error analysis
if stats.error_types:
    print("\nError types:")
    for error, count in stats.error_types.items():
        print(f"  {error}: {count}")

History Filtering and Analysis

from browser_use import Agent

agent = Agent(task="Web scraping task")
history = agent.run_sync()

# Filter successful actions only
successful_actions = history.filter_by_success(True)
print(f"Successful actions: {len(successful_actions.action_names())}")

# Filter by specific action type
clicks = history.filter_by_action("click_element")
print(f"Click actions: {len(clicks.action_names())}")

# Get recent screenshots
recent_screenshots = history.screenshots(n_last=5)
print(f"Recent screenshots: {recent_screenshots}")

History Persistence

from browser_use import Agent, AgentHistoryList

# Run task and save history
agent = Agent(task="Data extraction task")
history = agent.run_sync()

# Save to file
history.save_to_file("execution_history.json")
print("History saved to execution_history.json")

# Later: load and analyze
loaded_history = AgentHistoryList.load_from_file("execution_history.json")

print(f"Loaded history with {len(loaded_history.action_names())} actions")
print(f"Task was successful: {loaded_history.is_successful()}")

# Generate summary report
stats = loaded_history.get_statistics()
print(stats.summary_report())

Advanced Pattern Analysis

from browser_use import Agent, HistoryAnalyzer

# Run multiple similar tasks
tasks = [
    "Search for Python tutorials",
    "Search for JavaScript guides", 
    "Search for React documentation"
]

histories = []
for task in tasks:
    agent = Agent(task=task)
    history = agent.run_sync()
    histories.append(history)

# Analyze patterns across executions
for i, history in enumerate(histories):
    print(f"\nTask {i+1} Analysis:")
    analyzer = HistoryAnalyzer(history)
    
    # Identify failure patterns
    patterns = analyzer.identify_failure_patterns()
    if patterns:
        print(f"  Found {len(patterns)} failure patterns")
        for pattern in patterns:
            print(f"    {pattern.pattern_type}: {pattern.frequency} occurrences")
    
    # Get optimization suggestions
    suggestions = analyzer.suggest_optimizations()
    if suggestions:
        print(f"  {len(suggestions)} optimization suggestions:")
        for suggestion in suggestions:
            print(f"    {suggestion.category}: {suggestion.description}")

Performance Comparison

from browser_use import Agent, HistoryAnalyzer

# Baseline execution
baseline_agent = Agent(task="Search task", use_thinking=True)
baseline_history = baseline_agent.run_sync()

# Optimized execution
optimized_agent = Agent(task="Search task", use_thinking=False, flash_mode=True)
optimized_history = optimized_agent.run_sync()

# Compare performance
analyzer = HistoryAnalyzer(optimized_history)
comparison = analyzer.compare_with_baseline(baseline_history)

print(f"Performance change: {comparison.performance_change:+.1%}")
print(f"Speed change: {comparison.speed_change:+.1%}")
print(f"Reliability change: {comparison.reliability_change:+.1%}")
print(f"Overall assessment: {comparison.overall_assessment}")

if comparison.new_issues:
    print(f"New issues: {comparison.new_issues}")
if comparison.resolved_issues:
    print(f"Resolved issues: {comparison.resolved_issues}")

Execution Replay

from browser_use import Agent, AgentHistoryList, HistoryAnalyzer

# Load successful execution
history = AgentHistoryList.load_from_file("successful_execution.json")

# Generate replay script
analyzer = HistoryAnalyzer(history)
replay_script = analyzer.generate_replay_script()

# Save replay script
with open("replay_execution.py", "w") as f:
    f.write(replay_script)

print("Replay script generated: replay_execution.py")

# Or use built-in replay functionality
agent = Agent(task="Replay previous task")
replayed_results = await agent.load_and_rerun("successful_execution.json")

print(f"Replayed {len(replayed_results)} actions")
for i, result in enumerate(replayed_results):
    print(f"  Step {i+1}: {'✓' if result.success else '✗'}")

Custom Result Processing

from browser_use import ActionResult

# Create custom action result
result = ActionResult(
    success=True,
    extracted_content="Important data extracted",
    attachments=["report.pdf", "data.csv"],
    long_term_memory="Remember this site requires special login",
    metadata={
        "extraction_method": "css_selector",
        "data_quality": "high",
        "processing_time": 2.5
    }
)

# Process result
print(f"Success: {result.success}")
print(f"Content: {result.extracted_content}")
print(f"Attachments: {result.attachments}")
print(f"Memory: {result.long_term_memory}")

# Convert to/from dictionary for storage
result_dict = result.to_dict()
reconstructed_result = ActionResult.from_dict(result_dict)

Type Definitions

from datetime import datetime
from pathlib import Path
from typing import Optional, Any

# Type alias for backward compatibility
AgentHistory = AgentHistoryList

Install with Tessl CLI