or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agent-orchestration.mdbrowser-actions.mdbrowser-session.mddom-processing.mdindex.mdllm-integration.mdtask-results.md

dom-processing.mddocs/

0

# DOM Processing

1

2

Advanced DOM extraction, serialization, element indexing, and interaction capabilities for intelligent web page understanding. The DomService provides sophisticated DOM analysis and manipulation features that enable AI agents to understand and interact with web pages effectively.

3

4

## Capabilities

5

6

### DOM Service Core

7

8

Central service for DOM tree extraction and manipulation with support for cross-origin content and intelligent element indexing.

9

10

```python { .api }

11

class DomService:

12

def __init__(

13

self,

14

browser_session: BrowserSession,

15

logger: logging.Logger = None,

16

cross_origin_iframes: bool = False,

17

include_attributes: list[str] = None

18

):

19

"""

20

Initialize DOM processing service.

21

22

Parameters:

23

- browser_session: Browser session for DOM access

24

- logger: Optional custom logger instance

25

- cross_origin_iframes: Include cross-origin iframe content

26

- include_attributes: DOM attributes to include in serialization

27

"""

28

29

async def get_dom_tree(self) -> DomTree:

30

"""

31

Extract complete DOM tree from current page.

32

33

Returns:

34

DomTree: Structured representation of page DOM

35

"""

36

37

async def get_clickable_elements(self) -> list[ElementInfo]:

38

"""

39

Extract all clickable/interactable elements from page.

40

41

Returns:

42

list[ElementInfo]: List of elements that can be interacted with

43

"""

44

45

async def serialize_dom(

46

self,

47

include_text: bool = True,

48

include_attributes: bool = True,

49

max_depth: int = None

50

) -> str:

51

"""

52

Serialize DOM tree to text representation.

53

54

Parameters:

55

- include_text: Include text content of elements

56

- include_attributes: Include element attributes

57

- max_depth: Maximum tree depth to serialize

58

59

Returns:

60

str: Text representation of DOM structure

61

"""

62

63

async def find_elements_by_text(self, text: str) -> list[ElementInfo]:

64

"""

65

Find elements containing specific text.

66

67

Parameters:

68

- text: Text to search for in elements

69

70

Returns:

71

list[ElementInfo]: Elements containing the text

72

"""

73

74

async def find_elements_by_selector(self, selector: str) -> list[ElementInfo]:

75

"""

76

Find elements using CSS selector.

77

78

Parameters:

79

- selector: CSS selector string

80

81

Returns:

82

list[ElementInfo]: Elements matching the selector

83

"""

84

85

async def get_element_screenshot(self, index: int) -> str:

86

"""

87

Take screenshot of specific element.

88

89

Parameters:

90

- index: Element index

91

92

Returns:

93

str: Path to element screenshot image

94

"""

95

```

96

97

### DOM Tree Structure

98

99

Hierarchical representation of web page DOM structure with element relationships and metadata.

100

101

```python { .api }

102

class DomTree:

103

"""Complete DOM tree representation."""

104

root: DomNode

105

total_elements: int

106

clickable_elements: int

107

form_elements: int

108

interactive_elements: int

109

110

class DomNode:

111

"""Individual DOM node representation."""

112

tag: str

113

text: str

114

attributes: dict[str, str]

115

index: int

116

children: list[DomNode]

117

parent: DomNode

118

bounding_box: BoundingBox

119

is_clickable: bool

120

is_visible: bool

121

xpath: str

122

css_selector: str

123

124

class BoundingBox:

125

"""Element positioning and dimensions."""

126

x: float

127

y: float

128

width: float

129

height: float

130

top: float

131

left: float

132

bottom: float

133

right: float

134

```

135

136

### Element Information

137

138

Detailed information about individual DOM elements for interaction and analysis.

139

140

```python { .api }

141

class ElementInfo:

142

"""Comprehensive element information."""

143

index: int

144

tag: str

145

text: str

146

attributes: dict[str, str]

147

bounding_box: BoundingBox

148

is_clickable: bool

149

is_visible: bool

150

is_enabled: bool

151

element_type: str # 'button', 'input', 'link', 'text', etc.

152

xpath: str

153

css_selector: str

154

parent_index: int

155

children_indices: list[int]

156

157

class FormElementInfo(ElementInfo):

158

"""Form-specific element information."""

159

input_type: str # 'text', 'password', 'email', 'checkbox', etc.

160

is_required: bool

161

placeholder: str

162

value: str

163

min_value: str

164

max_value: str

165

pattern: str

166

167

class SelectElementInfo(ElementInfo):

168

"""Select/dropdown element information."""

169

options: list[SelectOption]

170

selected_value: str

171

multiple: bool

172

173

class SelectOption:

174

"""Option within select element."""

175

value: str

176

text: str

177

selected: bool

178

disabled: bool

179

```

180

181

### Element Interaction Analysis

182

183

Advanced analysis of element interactability and interaction patterns.

184

185

```python { .api }

186

class InteractionAnalyzer:

187

"""Analyze element interaction possibilities."""

188

189

async def analyze_clickability(self, element: ElementInfo) -> ClickabilityAnalysis:

190

"""

191

Analyze how clickable an element is.

192

193

Parameters:

194

- element: Element to analyze

195

196

Returns:

197

ClickabilityAnalysis: Detailed clickability assessment

198

"""

199

200

async def analyze_form_structure(self, form_index: int) -> FormAnalysis:

201

"""

202

Analyze form structure and required fields.

203

204

Parameters:

205

- form_index: Index of form element

206

207

Returns:

208

FormAnalysis: Complete form structure analysis

209

"""

210

211

async def suggest_interaction_strategy(

212

self,

213

target_goal: str

214

) -> InteractionStrategy:

215

"""

216

Suggest best interaction strategy for achieving goal.

217

218

Parameters:

219

- target_goal: Description of desired outcome

220

221

Returns:

222

InteractionStrategy: Recommended interaction sequence

223

"""

224

225

class ClickabilityAnalysis:

226

"""Analysis of element clickability."""

227

is_clickable: bool

228

confidence: float # 0.0-1.0

229

blocking_elements: list[ElementInfo]

230

alternative_elements: list[ElementInfo]

231

click_coordinates: tuple[float, float]

232

233

class FormAnalysis:

234

"""Complete form structure analysis."""

235

form_element: ElementInfo

236

required_fields: list[FormElementInfo]

237

optional_fields: list[FormElementInfo]

238

submit_buttons: list[ElementInfo]

239

validation_rules: dict[str, str]

240

241

class InteractionStrategy:

242

"""Recommended interaction sequence."""

243

steps: list[InteractionStep]

244

confidence: float

245

alternatives: list[InteractionStep]

246

247

class InteractionStep:

248

"""Individual interaction step."""

249

action: str # 'click', 'input', 'scroll', 'wait'

250

element_index: int

251

parameters: dict[str, Any]

252

expected_outcome: str

253

```

254

255

### Content Extraction

256

257

Advanced content extraction capabilities for text, images, and structured data.

258

259

```python { .api }

260

class ContentExtractor:

261

"""Extract various types of content from pages."""

262

263

async def extract_text_content(

264

self,

265

clean: bool = True,

266

include_hidden: bool = False

267

) -> str:

268

"""

269

Extract text content from page.

270

271

Parameters:

272

- clean: Clean and normalize text

273

- include_hidden: Include hidden element text

274

275

Returns:

276

str: Extracted text content

277

"""

278

279

async def extract_links(

280

self,

281

internal_only: bool = False,

282

include_anchors: bool = True

283

) -> list[LinkInfo]:

284

"""

285

Extract all links from page.

286

287

Parameters:

288

- internal_only: Only include internal links

289

- include_anchors: Include anchor links

290

291

Returns:

292

list[LinkInfo]: All links found on page

293

"""

294

295

async def extract_images(

296

self,

297

include_data_urls: bool = False,

298

min_size: tuple[int, int] = None

299

) -> list[ImageInfo]:

300

"""

301

Extract image information from page.

302

303

Parameters:

304

- include_data_urls: Include base64 data URLs

305

- min_size: Minimum image dimensions (width, height)

306

307

Returns:

308

list[ImageInfo]: All images found on page

309

"""

310

311

async def extract_tables(self) -> list[TableInfo]:

312

"""

313

Extract structured table data.

314

315

Returns:

316

list[TableInfo]: All tables with structured data

317

"""

318

319

class LinkInfo:

320

"""Link element information."""

321

url: str

322

text: str

323

title: str

324

element_index: int

325

is_external: bool

326

is_anchor: bool

327

328

class ImageInfo:

329

"""Image element information."""

330

src: str

331

alt: str

332

title: str

333

width: int

334

height: int

335

element_index: int

336

is_data_url: bool

337

338

class TableInfo:

339

"""Table structure and data."""

340

headers: list[str]

341

rows: list[list[str]]

342

element_index: int

343

caption: str

344

```

345

346

## Usage Examples

347

348

### Basic DOM Analysis

349

350

```python

351

from browser_use import BrowserSession, DomService

352

353

session = BrowserSession()

354

dom_service = DomService(session)

355

356

# Navigate to page

357

await session.navigate_to_url("https://example.com")

358

359

# Get complete DOM tree

360

dom_tree = await dom_service.get_dom_tree()

361

print(f"Total elements: {dom_tree.total_elements}")

362

print(f"Clickable elements: {dom_tree.clickable_elements}")

363

364

# Get clickable elements

365

clickable = await dom_service.get_clickable_elements()

366

for element in clickable:

367

print(f"Index {element.index}: {element.tag} - {element.text}")

368

```

369

370

### Element Search and Interaction

371

372

```python

373

from browser_use import DomService, BrowserSession

374

375

session = BrowserSession()

376

dom_service = DomService(session)

377

378

await session.navigate_to_url("https://example.com/search")

379

380

# Find search box by text

381

search_elements = await dom_service.find_elements_by_text("Search")

382

if search_elements:

383

search_box = search_elements[0]

384

print(f"Found search box at index: {search_box.index}")

385

386

# Find elements by CSS selector

387

buttons = await dom_service.find_elements_by_selector("button.primary")

388

for button in buttons:

389

print(f"Button {button.index}: {button.text}")

390

391

# Take screenshot of specific element

392

if buttons:

393

screenshot_path = await dom_service.get_element_screenshot(buttons[0].index)

394

print(f"Button screenshot saved: {screenshot_path}")

395

```

396

397

### Advanced DOM Configuration

398

399

```python

400

from browser_use import DomService, BrowserSession

401

402

session = BrowserSession()

403

404

# Configure DOM service with custom attributes

405

dom_service = DomService(

406

browser_session=session,

407

cross_origin_iframes=True, # Include iframe content

408

include_attributes=[

409

'id', 'class', 'name', 'data-testid',

410

'aria-label', 'placeholder', 'href', 'src'

411

]

412

)

413

414

await session.navigate_to_url("https://complex-site.com")

415

416

# Serialize DOM with custom options

417

dom_text = await dom_service.serialize_dom(

418

include_text=True,

419

include_attributes=True,

420

max_depth=5 # Limit depth for large pages

421

)

422

423

print("DOM Structure:")

424

print(dom_text[:1000]) # First 1000 characters

425

```

426

427

### Form Analysis Workflow

428

429

```python

430

from browser_use import DomService, BrowserSession

431

432

session = BrowserSession()

433

dom_service = DomService(session)

434

435

await session.navigate_to_url("https://example.com/contact")

436

437

# Find all form elements

438

forms = await dom_service.find_elements_by_selector("form")

439

440

for form in forms:

441

print(f"Form {form.index}:")

442

443

# Analyze form structure

444

analyzer = InteractionAnalyzer()

445

form_analysis = await analyzer.analyze_form_structure(form.index)

446

447

print(f" Required fields: {len(form_analysis.required_fields)}")

448

for field in form_analysis.required_fields:

449

print(f" {field.tag}[{field.input_type}]: {field.placeholder}")

450

451

print(f" Submit buttons: {len(form_analysis.submit_buttons)}")

452

for button in form_analysis.submit_buttons:

453

print(f" {button.text}")

454

```

455

456

### Content Extraction Workflow

457

458

```python

459

from browser_use import DomService, BrowserSession, ContentExtractor

460

461

session = BrowserSession()

462

dom_service = DomService(session)

463

extractor = ContentExtractor()

464

465

await session.navigate_to_url("https://news-site.com/article")

466

467

# Extract page text content

468

text_content = await extractor.extract_text_content(clean=True)

469

print(f"Article text ({len(text_content)} chars):")

470

print(text_content[:500])

471

472

# Extract all links

473

links = await extractor.extract_links(internal_only=False)

474

print(f"\nFound {len(links)} links:")

475

for link in links[:5]: # First 5 links

476

print(f" {link.text}: {link.url}")

477

478

# Extract images

479

images = await extractor.extract_images(min_size=(100, 100))

480

print(f"\nFound {len(images)} images:")

481

for image in images[:3]: # First 3 images

482

print(f" {image.alt}: {image.src}")

483

484

# Extract tables if any

485

tables = await extractor.extract_tables()

486

if tables:

487

print(f"\nFound {len(tables)} tables:")

488

for i, table in enumerate(tables):

489

print(f" Table {i}: {len(table.headers)} columns, {len(table.rows)} rows")

490

```

491

492

### Interaction Strategy Planning

493

494

```python

495

from browser_use import DomService, BrowserSession, InteractionAnalyzer

496

497

session = BrowserSession()

498

dom_service = DomService(session)

499

analyzer = InteractionAnalyzer()

500

501

await session.navigate_to_url("https://ecommerce-site.com/product")

502

503

# Plan interaction strategy for adding item to cart

504

strategy = await analyzer.suggest_interaction_strategy(

505

"Add this product to shopping cart"

506

)

507

508

print(f"Interaction strategy (confidence: {strategy.confidence}):")

509

for i, step in enumerate(strategy.steps):

510

print(f" Step {i+1}: {step.action} on element {step.element_index}")

511

print(f" Expected: {step.expected_outcome}")

512

513

# Execute strategy

514

for step in strategy.steps:

515

if step.action == "click":

516

await session.click_element(step.element_index)

517

elif step.action == "input":

518

text = step.parameters.get("text", "")

519

await session.input_text(step.element_index, text)

520

elif step.action == "scroll":

521

await session.scroll(step.parameters.get("down", True), 1)

522

```

523

524

### Element Clickability Analysis

525

526

```python

527

from browser_use import DomService, BrowserSession, InteractionAnalyzer

528

529

session = BrowserSession()

530

dom_service = DomService(session)

531

analyzer = InteractionAnalyzer()

532

533

await session.navigate_to_url("https://complex-ui.com")

534

535

# Find potential target element

536

target_elements = await dom_service.find_elements_by_text("Subscribe")

537

538

for element in target_elements:

539

# Analyze clickability

540

analysis = await analyzer.analyze_clickability(element)

541

542

print(f"Element {element.index} clickability:")

543

print(f" Clickable: {analysis.is_clickable}")

544

print(f" Confidence: {analysis.confidence}")

545

546

if analysis.blocking_elements:

547

print(f" Blocked by {len(analysis.blocking_elements)} elements")

548

549

if analysis.alternative_elements:

550

print(f" {len(analysis.alternative_elements)} alternatives available")

551

552

if analysis.is_clickable:

553

x, y = analysis.click_coordinates

554

print(f" Best click point: ({x}, {y})")

555

```

556

557

## Configuration Constants

558

559

```python { .api }

560

# Default DOM attributes to include in serialization

561

DEFAULT_INCLUDE_ATTRIBUTES: list[str] = [

562

'id', 'class', 'name', 'aria-label', 'data-testid',

563

'placeholder', 'href', 'src', 'type', 'value'

564

]

565

566

# Element interaction priorities

567

CLICKABLE_ELEMENT_PRIORITIES: dict[str, int] = {

568

'button': 10,

569

'a': 9,

570

'input[type="submit"]': 8,

571

'input[type="button"]': 7,

572

'select': 6,

573

'input': 5

574

}

575

576

# Maximum DOM serialization limits

577

MAX_DOM_TEXT_LENGTH: int = 50000

578

MAX_ELEMENT_DEPTH: int = 20

579

MAX_CLICKABLE_ELEMENTS: int = 100

580

```