or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agent-orchestration.mdbrowser-actions.mdbrowser-session.mddom-processing.mdindex.mdllm-integration.mdtask-results.md

browser-actions.mddocs/

0

# Browser Actions and Tools

1

2

Extensible action system with built-in browser automation capabilities. The Tools class provides a registry of actions that agents can execute, including navigation, element interaction, form handling, and custom action registration.

3

4

## Capabilities

5

6

### Tools Registry and Execution

7

8

Core action registry and execution engine for browser automation actions.

9

10

```python { .api }

11

class Tools:

12

def __init__(

13

self,

14

exclude_actions: list[str] = None,

15

output_model: type = None,

16

display_files_in_done_text: bool = True

17

):

18

"""

19

Create tools registry for browser actions.

20

21

Parameters:

22

- exclude_actions: List of action names to exclude from registry

23

- output_model: Type for structured output formatting

24

- display_files_in_done_text: Show files in completion messages

25

"""

26

27

async def act(

28

self,

29

action: ActionModel,

30

browser_session: BrowserSession,

31

controller: Any = None

32

) -> ActionResult:

33

"""

34

Execute a browser action.

35

36

Parameters:

37

- action: Action model with parameters

38

- browser_session: Browser session to execute action on

39

- controller: Optional controller context

40

41

Returns:

42

ActionResult: Execution result with success/failure status

43

"""

44

45

@property

46

def registry(self) -> ActionRegistry:

47

"""Access to action registry for custom action registration."""

48

```

49

50

### Custom Action Registration

51

52

System for registering custom browser actions with the tools registry.

53

54

```python { .api }

55

# Decorator for registering custom actions

56

def action(description: str, param_model: type[BaseModel] = None):

57

"""

58

Decorator to register custom browser actions.

59

60

Parameters:

61

- description: Description of what the action does

62

- param_model: Pydantic model for action parameters

63

64

Usage:

65

@tools.registry.action("Custom action description")

66

async def custom_action(param: str) -> ActionResult:

67

# Action implementation

68

return ActionResult(success=True)

69

"""

70

```

71

72

### Built-in Navigation Actions

73

74

Core navigation and URL management actions.

75

76

```python { .api }

77

def search_google(query: str) -> ActionResult:

78

"""

79

Search Google with the provided query.

80

81

Parameters:

82

- query: Search query string

83

84

Returns:

85

ActionResult: Search execution result

86

"""

87

88

def go_to_url(url: str) -> ActionResult:

89

"""

90

Navigate browser to specified URL.

91

92

Parameters:

93

- url: Target URL to navigate to

94

95

Returns:

96

ActionResult: Navigation result

97

"""

98

```

99

100

### Element Interaction Actions

101

102

Actions for interacting with DOM elements including clicking, text input, and form handling.

103

104

```python { .api }

105

def click_element(index: int) -> ActionResult:

106

"""

107

Click DOM element by its index.

108

109

Parameters:

110

- index: Element index from DOM serialization

111

112

Returns:

113

ActionResult: Click execution result

114

"""

115

116

def input_text(index: int, text: str) -> ActionResult:

117

"""

118

Input text into form element.

119

120

Parameters:

121

- index: Element index of input field

122

- text: Text to input into the field

123

124

Returns:

125

ActionResult: Text input result

126

"""

127

128

def send_keys(keys: str) -> ActionResult:

129

"""

130

Send keyboard keys to the browser.

131

132

Parameters:

133

- keys: Key combination to send (e.g., "Ctrl+C", "Enter", "Tab")

134

135

Returns:

136

ActionResult: Key sending result

137

"""

138

139

def upload_file(index: int, file_path: str) -> ActionResult:

140

"""

141

Upload file to file input element.

142

143

Parameters:

144

- index: Element index of file input

145

- file_path: Path to file to upload

146

147

Returns:

148

ActionResult: File upload result

149

"""

150

```

151

152

### Page Navigation Actions

153

154

Actions for page scrolling and viewport management.

155

156

```python { .api }

157

def scroll(down: bool, num_pages: float) -> ActionResult:

158

"""

159

Scroll page up or down.

160

161

Parameters:

162

- down: True to scroll down, False to scroll up

163

- num_pages: Number of pages to scroll (can be fractional)

164

165

Returns:

166

ActionResult: Scroll execution result

167

"""

168

```

169

170

### Tab Management Actions

171

172

Actions for managing browser tabs and windows.

173

174

```python { .api }

175

def switch_tab(tab_id: str) -> ActionResult:

176

"""

177

Switch to different browser tab.

178

179

Parameters:

180

- tab_id: Identifier of target tab

181

182

Returns:

183

ActionResult: Tab switch result

184

"""

185

186

def close_tab(tab_id: str) -> ActionResult:

187

"""

188

Close browser tab.

189

190

Parameters:

191

- tab_id: Identifier of tab to close

192

193

Returns:

194

ActionResult: Tab close result

195

"""

196

```

197

198

### Form and Dropdown Actions

199

200

Specialized actions for form element interaction and dropdown handling.

201

202

```python { .api }

203

def get_dropdown_options(index: int) -> ActionResult:

204

"""

205

Get available options from dropdown element.

206

207

Parameters:

208

- index: Element index of dropdown/select element

209

210

Returns:

211

ActionResult: Dropdown options with extracted_content containing option list

212

"""

213

214

def select_dropdown_option(index: int, option_value: str) -> ActionResult:

215

"""

216

Select option from dropdown element.

217

218

Parameters:

219

- index: Element index of dropdown/select element

220

- option_value: Value of option to select

221

222

Returns:

223

ActionResult: Option selection result

224

"""

225

```

226

227

### Task Completion Actions

228

229

Actions for marking tasks as complete and providing results.

230

231

```python { .api }

232

def done(text: str, files: list[str] = None) -> ActionResult:

233

"""

234

Mark task as completed with result text.

235

236

Parameters:

237

- text: Completion message or result description

238

- files: Optional list of file paths to attach to result

239

240

Returns:

241

ActionResult: Task completion result with is_done=True

242

"""

243

```

244

245

### Action Parameter Models

246

247

Pydantic models for structured action parameters and validation.

248

249

```python { .api }

250

class SearchGoogleAction(BaseModel):

251

"""Parameters for Google search action."""

252

query: str

253

254

class GoToUrlAction(BaseModel):

255

"""Parameters for URL navigation action."""

256

url: str

257

258

class ClickElementAction(BaseModel):

259

"""Parameters for element clicking action."""

260

index: int

261

262

class InputTextAction(BaseModel):

263

"""Parameters for text input action."""

264

index: int

265

text: str

266

267

class ScrollAction(BaseModel):

268

"""Parameters for page scrolling action."""

269

down: bool

270

num_pages: float

271

272

class SwitchTabAction(BaseModel):

273

"""Parameters for tab switching action."""

274

tab_id: str

275

276

class CloseTabAction(BaseModel):

277

"""Parameters for tab closing action."""

278

tab_id: str

279

280

class SendKeysAction(BaseModel):

281

"""Parameters for keyboard input action."""

282

keys: str

283

284

class UploadFileAction(BaseModel):

285

"""Parameters for file upload action."""

286

index: int

287

file_path: str

288

289

class GetDropdownOptionsAction(BaseModel):

290

"""Parameters for dropdown inspection action."""

291

index: int

292

293

class SelectDropdownOptionAction(BaseModel):

294

"""Parameters for dropdown selection action."""

295

index: int

296

option_value: str

297

298

class DoneAction(BaseModel):

299

"""Parameters for task completion action."""

300

text: str

301

files: list[str] = None

302

```

303

304

### Action Model Base

305

306

Base class for all action models with common functionality.

307

308

```python { .api }

309

class ActionModel(BaseModel):

310

"""Base model for browser actions."""

311

312

def get_index(self) -> int | None:

313

"""

314

Get element index from action parameters.

315

316

Returns:

317

int | None: Element index if action targets specific element

318

"""

319

320

def set_index(self, index: int) -> None:

321

"""

322

Set element index for action.

323

324

Parameters:

325

- index: Element index to set

326

"""

327

```

328

329

## Usage Examples

330

331

### Basic Action Execution

332

333

```python

334

from browser_use import Tools, BrowserSession

335

336

tools = Tools()

337

session = BrowserSession()

338

339

# Execute navigation action

340

result = await tools.act(

341

action=GoToUrlAction(url="https://example.com"),

342

browser_session=session

343

)

344

345

if result.success:

346

print("Navigation successful")

347

else:

348

print(f"Navigation failed: {result.error}")

349

```

350

351

### Custom Tools Configuration

352

353

```python

354

from browser_use import Tools

355

356

# Exclude certain actions

357

tools = Tools(

358

exclude_actions=["search_google", "upload_file"],

359

display_files_in_done_text=False

360

)

361

362

# Tools now available: go_to_url, click_element, input_text, etc.

363

# But NOT: search_google, upload_file

364

```

365

366

### Custom Action Registration

367

368

```python

369

from browser_use import Tools, ActionResult

370

from pydantic import BaseModel

371

372

class CustomActionParams(BaseModel):

373

target: str

374

options: dict = {}

375

376

tools = Tools()

377

378

@tools.registry.action("Perform custom browser operation", CustomActionParams)

379

async def custom_browser_action(target: str, options: dict = {}) -> ActionResult:

380

"""Custom action implementation."""

381

try:

382

# Perform custom browser operation

383

result = f"Custom action performed on {target}"

384

return ActionResult(

385

success=True,

386

extracted_content=result

387

)

388

except Exception as e:

389

return ActionResult(

390

success=False,

391

error=str(e)

392

)

393

394

# Use custom action

395

result = await tools.act(

396

action=CustomActionParams(target="special-element", options={"mode": "test"}),

397

browser_session=session

398

)

399

```

400

401

### Form Interaction Workflow

402

403

```python

404

from browser_use import Tools, BrowserSession

405

406

tools = Tools()

407

session = BrowserSession()

408

409

# Navigate to form page

410

await tools.act(GoToUrlAction(url="https://example.com/form"), session)

411

412

# Fill form fields

413

await tools.act(InputTextAction(index=1, text="John Doe"), session)

414

await tools.act(InputTextAction(index=2, text="john@example.com"), session)

415

416

# Handle dropdown

417

dropdown_result = await tools.act(GetDropdownOptionsAction(index=3), session)

418

print(f"Available options: {dropdown_result.extracted_content}")

419

420

await tools.act(SelectDropdownOptionAction(index=3, option_value="option1"), session)

421

422

# Submit form

423

await tools.act(ClickElementAction(index=4), session)

424

425

# Mark task complete

426

await tools.act(DoneAction(text="Form submitted successfully"), session)

427

```

428

429

### File Upload Workflow

430

431

```python

432

from browser_use import Tools, BrowserSession

433

434

tools = Tools()

435

session = BrowserSession()

436

437

# Navigate to upload page

438

await tools.act(GoToUrlAction(url="https://example.com/upload"), session)

439

440

# Upload file

441

result = await tools.act(

442

UploadFileAction(index=2, file_path="/path/to/document.pdf"),

443

session

444

)

445

446

if result.success:

447

# Continue with form if needed

448

await tools.act(ClickElementAction(index=3), session) # Submit button

449

await tools.act(DoneAction(text="File uploaded successfully"), session)

450

```

451

452

### Keyboard Shortcuts

453

454

```python

455

from browser_use import Tools, BrowserSession

456

457

tools = Tools()

458

session = BrowserSession()

459

460

# Navigate to page

461

await tools.act(GoToUrlAction(url="https://example.com"), session)

462

463

# Use keyboard shortcuts

464

await tools.act(SendKeysAction(keys="Ctrl+F"), session) # Open find

465

await tools.act(InputTextAction(index=1, text="search term"), session)

466

await tools.act(SendKeysAction(keys="Enter"), session) # Search

467

await tools.act(SendKeysAction(keys="Escape"), session) # Close find

468

```

469

470

## Type Definitions

471

472

```python { .api }

473

from typing import Any, Optional

474

from pydantic import BaseModel

475

476

class ActionRegistry:

477

"""Registry for browser actions."""

478

def action(self, description: str, param_model: type[BaseModel] = None): ...

479

480

Controller = Tools # Type alias for backward compatibility

481

```