or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

browser-automation.mdcookies.mdforms.mdhtml-dom.mdhttp.mdindex.mdjavascript.mdwindows.md

html-dom.mddocs/

0

# HTML DOM Manipulation

1

2

Comprehensive HTML DOM access and manipulation capabilities including element lookup, CSS selectors, XPath queries, and DOM tree navigation. Essential for web scraping and automated testing.

3

4

## Capabilities

5

6

### HtmlPage Class

7

8

Primary interface for HTML page manipulation and content extraction.

9

10

```java { .api }

11

/**

12

* HTML page with full DOM manipulation support

13

*/

14

public class HtmlPage extends SgmlPage {

15

/** Get element by ID attribute */

16

public HtmlElement getElementById(String id);

17

18

/** Get elements by tag name */

19

public List<HtmlElement> getElementsByTagName(String tagName);

20

21

/** Get elements by name attribute */

22

public List<HtmlElement> getElementsByName(String name);

23

24

/** Get elements by CSS class name */

25

public List<HtmlElement> getElementsByClassName(String className);

26

27

/** Get first element matching CSS selector */

28

public HtmlElement querySelector(String selectors);

29

30

/** Get all elements matching CSS selector */

31

public List<HtmlElement> querySelectorAll(String selectors);

32

33

/** Get first node matching XPath expression */

34

public DomNode getFirstByXPath(String xpathExpr);

35

36

/** Get all nodes matching XPath expression */

37

public List<?> getByXPath(String xpathExpr);

38

39

/** Get element by name (first match) */

40

public HtmlElement getElementByName(String name);

41

42

/** Get typed element by ID */

43

public <T extends HtmlElement> T getHtmlElementById(String id);

44

45

/** Get all forms on the page */

46

public List<HtmlForm> getForms();

47

48

/** Get form by name attribute */

49

public HtmlForm getFormByName(String name);

50

51

/** Get all anchor elements */

52

public List<HtmlAnchor> getAnchors();

53

54

/** Get anchor by name attribute */

55

public HtmlAnchor getAnchorByName(String name);

56

57

/** Get anchor by href attribute */

58

public HtmlAnchor getAnchorByHref(String href);

59

60

/** Get page title text */

61

public String getTitleText();

62

63

/** Get page content as plain text */

64

public String asText();

65

66

/** Get page content as XML string */

67

public String asXml();

68

69

/** Get head element */

70

public HtmlHead getHead();

71

72

/** Get body element */

73

public HtmlBody getBody();

74

75

/** Get document element (html) */

76

public HtmlHtml getDocumentElement();

77

78

/** Execute JavaScript code in page context */

79

public ScriptResult executeJavaScript(String sourceCode);

80

81

/** Execute JavaScript with source location info */

82

public ScriptResult executeJavaScript(String sourceCode, String sourceName, int startLine);

83

84

/** Click anywhere on the page */

85

public <P extends Page> P click() throws IOException;

86

87

/** Get character set encoding */

88

public Charset getCharset();

89

90

/** Refresh the page */

91

public void refresh() throws IOException;

92

93

/** Check if page has been modified */

94

public boolean hasFeature(PageFeature feature);

95

}

96

```

97

98

**Usage Examples:**

99

100

```java

101

import com.gargoylesoftware.htmlunit.WebClient;

102

import com.gargoylesoftware.htmlunit.html.HtmlPage;

103

import com.gargoylesoftware.htmlunit.html.HtmlElement;

104

105

try (WebClient webClient = new WebClient()) {

106

HtmlPage page = webClient.getPage("https://example.com");

107

108

// Element lookup by ID

109

HtmlElement loginButton = page.getElementById("login-btn");

110

if (loginButton != null) {

111

loginButton.click();

112

}

113

114

// CSS selector queries

115

HtmlElement navbar = page.querySelector(".navbar");

116

List<HtmlElement> menuItems = page.querySelectorAll(".menu-item");

117

118

// XPath queries

119

List<?> allLinks = page.getByXPath("//a[@href]");

120

HtmlElement firstParagraph = (HtmlElement) page.getFirstByXPath("//p[1]");

121

122

// Content extraction

123

String pageTitle = page.getTitleText();

124

String pageText = page.asText();

125

String pageHtml = page.asXml();

126

127

System.out.println("Title: " + pageTitle);

128

System.out.println("Found " + menuItems.size() + " menu items");

129

}

130

```

131

132

### DomNode Class

133

134

Base class for all DOM nodes providing tree navigation and manipulation.

135

136

```java { .api }

137

/**

138

* Base class for all DOM nodes

139

*/

140

public abstract class DomNode implements Cloneable {

141

/** Get parent node */

142

public DomNode getParentNode();

143

144

/** Get all child nodes */

145

public DomNodeList<DomNode> getChildNodes();

146

147

/** Get first child node */

148

public DomNode getFirstChild();

149

150

/** Get last child node */

151

public DomNode getLastChild();

152

153

/** Get next sibling node */

154

public DomNode getNextSibling();

155

156

/** Get previous sibling node */

157

public DomNode getPreviousSibling();

158

159

/** Get node name */

160

public String getNodeName();

161

162

/** Get node value */

163

public String getNodeValue();

164

165

/** Set node value */

166

public void setNodeValue(String nodeValue);

167

168

/** Get DOM node type constant */

169

public short getNodeType();

170

171

/** Append child node */

172

public DomNode appendChild(DomNode node);

173

174

/** Insert child before reference node */

175

public DomNode insertBefore(DomNode newChild, DomNode refChild);

176

177

/** Replace child node */

178

public DomNode replaceChild(DomNode newChild, DomNode oldChild);

179

180

/** Remove child node */

181

public DomNode removeChild(DomNode child);

182

183

/** Remove this node from parent */

184

public void remove();

185

186

/** Clone node (deep or shallow) */

187

public DomNode cloneNode(boolean deep);

188

189

/** Check if node has child nodes */

190

public boolean hasChildNodes();

191

192

/** Get text content of node and descendants */

193

public String getTextContent();

194

195

/** Set text content */

196

public void setTextContent(String textContent);

197

198

/** Get node as text representation */

199

public String asText();

200

201

/** Get node as XML string */

202

public String asXml();

203

204

/** Get containing page */

205

public SgmlPage getPage();

206

207

/** Get containing document */

208

public DomDocumentFragment getOwnerDocument();

209

210

/** Get nodes matching XPath expression */

211

public List<?> getByXPath(String xpathExpr);

212

213

/** Get canonical XPath to this node */

214

public String getCanonicalXPath();

215

216

/** Check if node is ancestor of another node */

217

public boolean isAncestorOf(DomNode node);

218

219

/** Check if node is descendant of another node */

220

public boolean isDescendantOf(DomNode node);

221

}

222

```

223

224

### DomElement Class

225

226

Base class for HTML/XML elements with attribute management.

227

228

```java { .api }

229

/**

230

* Base class for HTML and XML elements

231

*/

232

public class DomElement extends DomNode {

233

/** Get element tag name */

234

public String getTagName();

235

236

/** Get attribute value */

237

public String getAttribute(String attributeName);

238

239

/** Get attribute value with namespace */

240

public String getAttributeNS(String namespaceURI, String localName);

241

242

/** Set attribute value */

243

public void setAttribute(String attributeName, String attributeValue);

244

245

/** Set attribute with namespace */

246

public void setAttributeNS(String namespaceURI, String qualifiedName, String attributeValue);

247

248

/** Remove attribute */

249

public void removeAttribute(String attributeName);

250

251

/** Remove attribute with namespace */

252

public void removeAttributeNS(String namespaceURI, String localName);

253

254

/** Check if attribute exists */

255

public boolean hasAttribute(String attributeName);

256

257

/** Check if namespaced attribute exists */

258

public boolean hasAttributeNS(String namespaceURI, String localName);

259

260

/** Get all attribute names */

261

public Iterable<String> getAttributeNames();

262

263

/** Get element ID */

264

public String getId();

265

266

/** Set element ID */

267

public void setId(String id);

268

269

/** Get CSS class names */

270

public String getClassName();

271

272

/** Set CSS class names */

273

public void setClassName(String className);

274

275

/** Add CSS class */

276

public void addClass(String className);

277

278

/** Remove CSS class */

279

public void removeClass(String className);

280

281

/** Check if has CSS class */

282

public boolean hasClass(String className);

283

284

/** Get elements by tag name (descendants) */

285

public DomNodeList<DomElement> getElementsByTagName(String name);

286

287

/** Get elements by tag name with namespace */

288

public DomNodeList<DomElement> getElementsByTagNameNS(String namespaceURI, String localName);

289

290

/** Click the element */

291

public <P extends Page> P click() throws IOException;

292

293

/** Double-click the element */

294

public <P extends Page> P dblClick() throws IOException;

295

296

/** Right-click the element */

297

public <P extends Page> P rightClick() throws IOException;

298

299

/** Focus the element */

300

public void focus();

301

302

/** Remove focus from element */

303

public void blur();

304

305

/** Check if element is displayed */

306

public boolean isDisplayed();

307

308

/** Get element's computed style */

309

public ComputedCSSStyleDeclaration getComputedStyle();

310

311

/** Get element's inline style */

312

public ElementCssStyleDeclaration getStyle();

313

314

/** Check if element matches CSS selector */

315

public boolean matches(String selector);

316

317

/** Get closest ancestor matching selector */

318

public DomElement closest(String selector);

319

}

320

```

321

322

**Usage Examples:**

323

324

```java

325

import com.gargoylesoftware.htmlunit.html.DomElement;

326

import com.gargoylesoftware.htmlunit.html.HtmlElement;

327

328

// Element attribute manipulation

329

HtmlElement element = page.getElementById("myElement");

330

element.setAttribute("data-value", "123");

331

String value = element.getAttribute("data-value");

332

boolean hasClass = element.hasAttribute("class");

333

334

// CSS class manipulation

335

element.addClass("active");

336

element.removeClass("disabled");

337

boolean isActive = element.hasClass("active");

338

339

// DOM tree navigation

340

DomNode parent = element.getParentNode();

341

DomNodeList<DomNode> children = element.getChildNodes();

342

DomNode nextSibling = element.getNextSibling();

343

344

// Element interaction

345

if (element.isDisplayed()) {

346

HtmlPage newPage = element.click();

347

}

348

```

349

350

### HtmlElement Class

351

352

Specialized HTML element with additional HTML-specific functionality.

353

354

```java { .api }

355

/**

356

* Base class for HTML elements with interaction capabilities

357

*/

358

public abstract class HtmlElement extends DomElement {

359

/** Get element's lang attribute */

360

public String getLangAttribute();

361

362

/** Get element's dir attribute */

363

public String getDirAttribute();

364

365

/** Get element's title attribute */

366

public String getTitleAttribute();

367

368

/** Get element's tabindex */

369

public String getTabIndexAttribute();

370

371

/** Get element's onclick attribute */

372

public String getOnClickAttribute();

373

374

/** Get element's ondblclick attribute */

375

public String getOnDblClickAttribute();

376

377

/** Get element's onmousedown attribute */

378

public String getOnMouseDownAttribute();

379

380

/** Get element's onmouseup attribute */

381

public String getOnMouseUpAttribute();

382

383

/** Get element's onmouseover attribute */

384

public String getOnMouseOverAttribute();

385

386

/** Get element's onmousemove attribute */

387

public String getOnMouseMoveAttribute();

388

389

/** Get element's onmouseout attribute */

390

public String getOnMouseOutAttribute();

391

392

/** Get element's onkeypress attribute */

393

public String getOnKeyPressAttribute();

394

395

/** Get element's onkeydown attribute */

396

public String getOnKeyDownAttribute();

397

398

/** Get element's onkeyup attribute */

399

public String getOnKeyUpAttribute();

400

401

/** Type text into element (fires keyboard events) */

402

public void type(String text) throws IOException;

403

404

/** Type single character */

405

public void type(char c) throws IOException;

406

407

/** Simulate key press */

408

public void keyDown(int keyCode);

409

410

/** Simulate key release */

411

public void keyUp(int keyCode);

412

413

/** Get element's bounding rectangle */

414

public Rectangle getBoundingClientRect();

415

416

/** Scroll element into view */

417

public void scrollIntoView();

418

419

/** Check if element is enabled */

420

public boolean isEnabled();

421

422

/** Get element's offset parent */

423

public HtmlElement getOffsetParent();

424

425

/** Get offset dimensions */

426

public int getOffsetHeight();

427

public int getOffsetWidth();

428

public int getOffsetLeft();

429

public int getOffsetTop();

430

431

/** Get client dimensions */

432

public int getClientHeight();

433

public int getClientWidth();

434

public int getClientLeft();

435

public int getClientTop();

436

437

/** Get scroll dimensions */

438

public int getScrollHeight();

439

public int getScrollWidth();

440

public int getScrollLeft();

441

public int getScrollTop();

442

443

/** Set scroll position */

444

public void setScrollLeft(int scrollLeft);

445

public void setScrollTop(int scrollTop);

446

}

447

```

448

449

### DOM Node Types

450

451

```java { .api }

452

/**

453

* DOM node type constants (from DOM specification)

454

*/

455

public interface Node {

456

public static final short ELEMENT_NODE = 1;

457

public static final short ATTRIBUTE_NODE = 2;

458

public static final short TEXT_NODE = 3;

459

public static final short CDATA_SECTION_NODE = 4;

460

public static final short ENTITY_REFERENCE_NODE = 5;

461

public static final short ENTITY_NODE = 6;

462

public static final short PROCESSING_INSTRUCTION_NODE = 7;

463

public static final short COMMENT_NODE = 8;

464

public static final short DOCUMENT_NODE = 9;

465

public static final short DOCUMENT_TYPE_NODE = 10;

466

public static final short DOCUMENT_FRAGMENT_NODE = 11;

467

public static final short NOTATION_NODE = 12;

468

}

469

```

470

471

### Common HTML Elements

472

473

```java { .api }

474

/**

475

* HTML document structure elements

476

*/

477

public class HtmlHtml extends HtmlElement {

478

// Root HTML element

479

}

480

481

public class HtmlHead extends HtmlElement {

482

// Document head element

483

}

484

485

public class HtmlBody extends HtmlElement {

486

// Document body element

487

}

488

489

public class HtmlTitle extends HtmlElement {

490

// Document title element

491

}

492

493

/**

494

* Common content elements

495

*/

496

public class HtmlDivision extends HtmlElement {

497

// Division element (<div>)

498

}

499

500

public class HtmlSpan extends HtmlElement {

501

// Span element (<span>)

502

}

503

504

public class HtmlParagraph extends HtmlElement {

505

// Paragraph element (<p>)

506

}

507

508

public class HtmlHeading1 extends HtmlElement {

509

// H1 heading element

510

}

511

512

public class HtmlHeading2 extends HtmlElement {

513

// H2 heading element

514

}

515

516

public class HtmlHeading3 extends HtmlElement {

517

// H3 heading element

518

}

519

520

public class HtmlHeading4 extends HtmlElement {

521

// H4 heading element

522

}

523

524

public class HtmlHeading5 extends HtmlElement {

525

// H5 heading element

526

}

527

528

public class HtmlHeading6 extends HtmlElement {

529

// H6 heading element

530

}

531

532

/**

533

* List elements

534

*/

535

public class HtmlUnorderedList extends HtmlElement {

536

// <ul> element

537

}

538

539

public class HtmlOrderedList extends HtmlElement {

540

// <ol> element

541

}

542

543

public class HtmlListItem extends HtmlElement {

544

// <li> element

545

}

546

547

/**

548

* Text formatting elements

549

*/

550

public class HtmlEmphasis extends HtmlElement {

551

// <em> element

552

}

553

554

public class HtmlStrong extends HtmlElement {

555

// <strong> element

556

}

557

558

public class HtmlBold extends HtmlElement {

559

// <b> element

560

}

561

562

public class HtmlItalic extends HtmlElement {

563

// <i> element

564

}

565

```

566

567

### CSS Selector Support

568

569

HtmlUnit supports comprehensive CSS selector syntax:

570

571

**Basic Selectors:**

572

- Element: `div`, `p`, `span`

573

- Class: `.className`, `.class1.class2`

574

- ID: `#elementId`

575

- Attribute: `[attribute]`, `[attribute="value"]`, `[attribute^="prefix"]`

576

577

**Combinators:**

578

- Descendant: `div p` (p inside div)

579

- Child: `div > p` (p direct child of div)

580

- Adjacent sibling: `h1 + p` (p immediately after h1)

581

- General sibling: `h1 ~ p` (p sibling after h1)

582

583

**Pseudo-classes:**

584

- `:first-child`, `:last-child`, `:nth-child(n)`

585

- `:first-of-type`, `:last-of-type`, `:nth-of-type(n)`

586

- `:not(selector)`

587

- `:checked`, `:disabled`, `:enabled`

588

589

**Usage Example:**

590

591

```java

592

// Complex CSS selectors

593

List<HtmlElement> activeMenuItems = page.querySelectorAll(".menu .item.active");

594

HtmlElement firstTableCell = page.querySelector("table tr:first-child td:first-child");

595

List<HtmlElement> checkedInputs = page.querySelectorAll("input[type='checkbox']:checked");

596

HtmlElement submitButton = page.querySelector("form input[type='submit'], form button[type='submit']");

597

```