or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agent-orchestration.mdbrowser-actions.mdbrowser-session.mddom-processing.mdindex.mdllm-integration.mdtask-results.md

task-results.mddocs/

0

# Task Results and History

1

2

Comprehensive result tracking, history management, and execution analysis including success/failure detection, error handling, and workflow replay capabilities. The ActionResult and AgentHistoryList classes provide detailed insight into agent task execution.

3

4

## Capabilities

5

6

### Action Results

7

8

Individual action execution results with comprehensive success/failure information and extracted content.

9

10

```python { .api }

11

class ActionResult:

12

def __init__(

13

self,

14

is_done: bool = None,

15

success: bool = None,

16

error: str = None,

17

extracted_content: str = None,

18

attachments: list[str] = None,

19

long_term_memory: str = None,

20

metadata: dict = None

21

):

22

"""

23

Result of executing a browser action.

24

25

Parameters:

26

- is_done: Whether the overall task is complete

27

- success: Whether this specific action succeeded

28

- error: Error message if action failed

29

- extracted_content: Content extracted from the page

30

- attachments: List of file paths attached to result

31

- long_term_memory: Information to store for future reference

32

- metadata: Additional metadata about the action

33

"""

34

35

is_done: bool = None

36

success: bool = None

37

error: str = None

38

extracted_content: str = None

39

attachments: list[str] = None

40

long_term_memory: str = None

41

metadata: dict = None

42

43

def to_dict(self) -> dict:

44

"""Convert result to dictionary representation."""

45

46

@classmethod

47

def from_dict(cls, data: dict) -> 'ActionResult':

48

"""Create ActionResult from dictionary."""

49

50

def __str__(self) -> str:

51

"""String representation of the result."""

52

```

53

54

### Agent History Management

55

56

Comprehensive execution history with analysis methods for understanding agent behavior and task outcomes.

57

58

```python { .api }

59

class AgentHistoryList:

60

def __init__(self, history: list[AgentHistoryItem] = None):

61

"""

62

List of agent execution history with analysis capabilities.

63

64

Parameters:

65

- history: List of historical execution items

66

"""

67

68

def is_done(self) -> bool:

69

"""

70

Check if the task was completed.

71

72

Returns:

73

bool: True if task is marked as done

74

"""

75

76

def is_successful(self) -> bool | None:

77

"""

78

Check if the task completed successfully.

79

80

Returns:

81

bool | None: True if successful, False if failed, None if not done

82

"""

83

84

def final_result(self) -> str | None:

85

"""

86

Get the final result text from task completion.

87

88

Returns:

89

str | None: Final result message or None if not completed

90

"""

91

92

def errors(self) -> list[str | None]:

93

"""

94

Get all error messages from execution history.

95

96

Returns:

97

list[str | None]: List of error messages

98

"""

99

100

def urls(self) -> list[str | None]:

101

"""

102

Get all URLs visited during execution.

103

104

Returns:

105

list[str | None]: List of visited URLs

106

"""

107

108

def screenshots(self, n_last: int = None) -> list[str | None]:

109

"""

110

Get screenshot paths from execution history.

111

112

Parameters:

113

- n_last: Number of recent screenshots to return (all if None)

114

115

Returns:

116

list[str | None]: List of screenshot file paths

117

"""

118

119

def action_names(self) -> list[str]:

120

"""

121

Get names of all actions executed.

122

123

Returns:

124

list[str]: List of action names in execution order

125

"""

126

127

def total_duration_seconds(self) -> float:

128

"""

129

Get total execution time in seconds.

130

131

Returns:

132

float: Total duration of task execution

133

"""

134

135

def save_to_file(self, filepath: str | Path) -> None:

136

"""

137

Save execution history to JSON file.

138

139

Parameters:

140

- filepath: Path where to save the history file

141

"""

142

143

@classmethod

144

def load_from_file(

145

cls,

146

filepath: str | Path,

147

output_model: type = None

148

) -> 'AgentHistoryList':

149

"""

150

Load execution history from JSON file.

151

152

Parameters:

153

- filepath: Path to history file

154

- output_model: Optional output model type for structured data

155

156

Returns:

157

AgentHistoryList: Loaded history with analysis capabilities

158

"""

159

160

def filter_by_action(self, action_name: str) -> 'AgentHistoryList':

161

"""

162

Filter history to only include specific action type.

163

164

Parameters:

165

- action_name: Name of action to filter by

166

167

Returns:

168

AgentHistoryList: Filtered history

169

"""

170

171

def filter_by_success(self, success: bool) -> 'AgentHistoryList':

172

"""

173

Filter history by success/failure status.

174

175

Parameters:

176

- success: True for successful actions, False for failures

177

178

Returns:

179

AgentHistoryList: Filtered history

180

"""

181

182

def get_statistics(self) -> ExecutionStatistics:

183

"""

184

Get detailed execution statistics.

185

186

Returns:

187

ExecutionStatistics: Comprehensive execution metrics

188

"""

189

```

190

191

### History Item Structure

192

193

Individual items in the execution history with detailed action information.

194

195

```python { .api }

196

class AgentHistoryItem:

197

"""Individual item in agent execution history."""

198

199

timestamp: datetime

200

action_name: str

201

action_parameters: dict

202

result: ActionResult

203

browser_state: BrowserStateSummary

204

screenshot_path: str

205

step_number: int

206

duration_seconds: float

207

model_thinking: str # LLM reasoning process

208

model_response: str # Raw LLM response

209

210

def to_dict(self) -> dict:

211

"""Convert history item to dictionary."""

212

213

@classmethod

214

def from_dict(cls, data: dict) -> 'AgentHistoryItem':

215

"""Create history item from dictionary."""

216

```

217

218

### Execution Statistics

219

220

Detailed metrics and analysis of agent execution performance.

221

222

```python { .api }

223

class ExecutionStatistics:

224

"""Comprehensive execution statistics."""

225

226

total_actions: int

227

successful_actions: int

228

failed_actions: int

229

success_rate: float

230

total_duration: float

231

average_action_duration: float

232

actions_by_type: dict[str, int]

233

error_types: dict[str, int]

234

urls_visited: list[str]

235

screenshots_taken: int

236

237

# Performance metrics

238

fastest_action: float

239

slowest_action: float

240

most_common_action: str

241

most_common_error: str

242

243

# Task completion metrics

244

steps_to_completion: int

245

completion_efficiency: float # success_rate / steps_to_completion

246

retry_count: int

247

248

def to_dict(self) -> dict:

249

"""Convert statistics to dictionary."""

250

251

def summary_report(self) -> str:

252

"""Generate human-readable summary report."""

253

```

254

255

### History Analysis Tools

256

257

Advanced analysis tools for understanding execution patterns and optimizing agent performance.

258

259

```python { .api }

260

class HistoryAnalyzer:

261

"""Advanced analysis tools for execution history."""

262

263

def __init__(self, history: AgentHistoryList):

264

"""

265

Initialize analyzer with execution history.

266

267

Parameters:

268

- history: Agent execution history to analyze

269

"""

270

271

def identify_failure_patterns(self) -> list[FailurePattern]:

272

"""

273

Identify common failure patterns in execution.

274

275

Returns:

276

list[FailurePattern]: Common failure patterns found

277

"""

278

279

def suggest_optimizations(self) -> list[OptimizationSuggestion]:

280

"""

281

Suggest optimizations based on execution analysis.

282

283

Returns:

284

list[OptimizationSuggestion]: Recommended optimizations

285

"""

286

287

def compare_with_baseline(

288

self,

289

baseline_history: AgentHistoryList

290

) -> ComparisonReport:

291

"""

292

Compare current execution with baseline performance.

293

294

Parameters:

295

- baseline_history: Baseline execution history

296

297

Returns:

298

ComparisonReport: Detailed performance comparison

299

"""

300

301

def generate_replay_script(self) -> str:

302

"""

303

Generate script to replay successful execution path.

304

305

Returns:

306

str: Python script for replaying execution

307

"""

308

309

class FailurePattern:

310

"""Common failure pattern in execution."""

311

pattern_type: str

312

frequency: int

313

actions_involved: list[str]

314

common_errors: list[str]

315

suggested_fix: str

316

317

class OptimizationSuggestion:

318

"""Suggested optimization for agent performance."""

319

category: str # 'speed', 'reliability', 'efficiency'

320

description: str

321

expected_improvement: str

322

implementation_difficulty: str # 'easy', 'medium', 'hard'

323

324

class ComparisonReport:

325

"""Performance comparison between executions."""

326

performance_change: float # Percentage change

327

speed_change: float

328

reliability_change: float

329

new_issues: list[str]

330

resolved_issues: list[str]

331

overall_assessment: str

332

```

333

334

## Usage Examples

335

336

### Basic Result Handling

337

338

```python

339

from browser_use import Agent, ActionResult

340

341

agent = Agent(task="Search for information")

342

history = agent.run_sync()

343

344

# Check overall success

345

if history.is_successful():

346

print(f"Task completed successfully!")

347

print(f"Final result: {history.final_result()}")

348

else:

349

print("Task failed:")

350

for error in history.errors():

351

if error:

352

print(f" - {error}")

353

354

# Get execution statistics

355

print(f"Total actions: {len(history.action_names())}")

356

print(f"Duration: {history.total_duration_seconds():.2f} seconds")

357

print(f"URLs visited: {len(history.urls())}")

358

```

359

360

### Detailed History Analysis

361

362

```python

363

from browser_use import Agent, HistoryAnalyzer

364

365

agent = Agent(task="Complex multi-step task")

366

history = agent.run_sync()

367

368

# Get detailed statistics

369

stats = history.get_statistics()

370

print(f"Success rate: {stats.success_rate:.2%}")

371

print(f"Average action duration: {stats.average_action_duration:.2f}s")

372

print(f"Most common action: {stats.most_common_action}")

373

374

# Action breakdown

375

print("\nActions by type:")

376

for action, count in stats.actions_by_type.items():

377

print(f" {action}: {count}")

378

379

# Error analysis

380

if stats.error_types:

381

print("\nError types:")

382

for error, count in stats.error_types.items():

383

print(f" {error}: {count}")

384

```

385

386

### History Filtering and Analysis

387

388

```python

389

from browser_use import Agent

390

391

agent = Agent(task="Web scraping task")

392

history = agent.run_sync()

393

394

# Filter successful actions only

395

successful_actions = history.filter_by_success(True)

396

print(f"Successful actions: {len(successful_actions.action_names())}")

397

398

# Filter by specific action type

399

clicks = history.filter_by_action("click_element")

400

print(f"Click actions: {len(clicks.action_names())}")

401

402

# Get recent screenshots

403

recent_screenshots = history.screenshots(n_last=5)

404

print(f"Recent screenshots: {recent_screenshots}")

405

```

406

407

### History Persistence

408

409

```python

410

from browser_use import Agent, AgentHistoryList

411

412

# Run task and save history

413

agent = Agent(task="Data extraction task")

414

history = agent.run_sync()

415

416

# Save to file

417

history.save_to_file("execution_history.json")

418

print("History saved to execution_history.json")

419

420

# Later: load and analyze

421

loaded_history = AgentHistoryList.load_from_file("execution_history.json")

422

423

print(f"Loaded history with {len(loaded_history.action_names())} actions")

424

print(f"Task was successful: {loaded_history.is_successful()}")

425

426

# Generate summary report

427

stats = loaded_history.get_statistics()

428

print(stats.summary_report())

429

```

430

431

### Advanced Pattern Analysis

432

433

```python

434

from browser_use import Agent, HistoryAnalyzer

435

436

# Run multiple similar tasks

437

tasks = [

438

"Search for Python tutorials",

439

"Search for JavaScript guides",

440

"Search for React documentation"

441

]

442

443

histories = []

444

for task in tasks:

445

agent = Agent(task=task)

446

history = agent.run_sync()

447

histories.append(history)

448

449

# Analyze patterns across executions

450

for i, history in enumerate(histories):

451

print(f"\nTask {i+1} Analysis:")

452

analyzer = HistoryAnalyzer(history)

453

454

# Identify failure patterns

455

patterns = analyzer.identify_failure_patterns()

456

if patterns:

457

print(f" Found {len(patterns)} failure patterns")

458

for pattern in patterns:

459

print(f" {pattern.pattern_type}: {pattern.frequency} occurrences")

460

461

# Get optimization suggestions

462

suggestions = analyzer.suggest_optimizations()

463

if suggestions:

464

print(f" {len(suggestions)} optimization suggestions:")

465

for suggestion in suggestions:

466

print(f" {suggestion.category}: {suggestion.description}")

467

```

468

469

### Performance Comparison

470

471

```python

472

from browser_use import Agent, HistoryAnalyzer

473

474

# Baseline execution

475

baseline_agent = Agent(task="Search task", use_thinking=True)

476

baseline_history = baseline_agent.run_sync()

477

478

# Optimized execution

479

optimized_agent = Agent(task="Search task", use_thinking=False, flash_mode=True)

480

optimized_history = optimized_agent.run_sync()

481

482

# Compare performance

483

analyzer = HistoryAnalyzer(optimized_history)

484

comparison = analyzer.compare_with_baseline(baseline_history)

485

486

print(f"Performance change: {comparison.performance_change:+.1%}")

487

print(f"Speed change: {comparison.speed_change:+.1%}")

488

print(f"Reliability change: {comparison.reliability_change:+.1%}")

489

print(f"Overall assessment: {comparison.overall_assessment}")

490

491

if comparison.new_issues:

492

print(f"New issues: {comparison.new_issues}")

493

if comparison.resolved_issues:

494

print(f"Resolved issues: {comparison.resolved_issues}")

495

```

496

497

### Execution Replay

498

499

```python

500

from browser_use import Agent, AgentHistoryList, HistoryAnalyzer

501

502

# Load successful execution

503

history = AgentHistoryList.load_from_file("successful_execution.json")

504

505

# Generate replay script

506

analyzer = HistoryAnalyzer(history)

507

replay_script = analyzer.generate_replay_script()

508

509

# Save replay script

510

with open("replay_execution.py", "w") as f:

511

f.write(replay_script)

512

513

print("Replay script generated: replay_execution.py")

514

515

# Or use built-in replay functionality

516

agent = Agent(task="Replay previous task")

517

replayed_results = await agent.load_and_rerun("successful_execution.json")

518

519

print(f"Replayed {len(replayed_results)} actions")

520

for i, result in enumerate(replayed_results):

521

print(f" Step {i+1}: {'✓' if result.success else '✗'}")

522

```

523

524

### Custom Result Processing

525

526

```python

527

from browser_use import ActionResult

528

529

# Create custom action result

530

result = ActionResult(

531

success=True,

532

extracted_content="Important data extracted",

533

attachments=["report.pdf", "data.csv"],

534

long_term_memory="Remember this site requires special login",

535

metadata={

536

"extraction_method": "css_selector",

537

"data_quality": "high",

538

"processing_time": 2.5

539

}

540

)

541

542

# Process result

543

print(f"Success: {result.success}")

544

print(f"Content: {result.extracted_content}")

545

print(f"Attachments: {result.attachments}")

546

print(f"Memory: {result.long_term_memory}")

547

548

# Convert to/from dictionary for storage

549

result_dict = result.to_dict()

550

reconstructed_result = ActionResult.from_dict(result_dict)

551

```

552

553

## Type Definitions

554

555

```python { .api }

556

from datetime import datetime

557

from pathlib import Path

558

from typing import Optional, Any

559

560

# Type alias for backward compatibility

561

AgentHistory = AgentHistoryList

562

```