0
# Page and DOM Interaction
1
2
HTML page representation and DOM manipulation capabilities providing complete access to page structure, element selection, content extraction, and DOM navigation.
3
4
## Capabilities
5
6
### HTML Page Access
7
8
The main interface for interacting with HTML documents, providing methods for element selection and content extraction.
9
10
```java { .api }
11
public class HtmlPage extends SgmlPage {
12
/**
13
* Get an element by its ID attribute
14
* @param id the ID to search for
15
* @return DomElement with the specified ID, or null if not found
16
*/
17
public DomElement getElementById(String id);
18
19
/**
20
* Get all elements with the specified tag name (inherited from DomElement)
21
* @param name the tag name to search for
22
* @return DomNodeList of HtmlElement objects with matching tag names
23
*/
24
public DomNodeList<HtmlElement> getElementsByTagName(String name);
25
26
/**
27
* Get the page title text
28
* @return the text content of the title element
29
*/
30
public String getTitleText();
31
32
/**
33
* Get the page content as normalized text (without HTML tags)
34
* @return normalized text representation of the page
35
*/
36
public String asNormalizedText();
37
38
/**
39
* Get the page content as XML
40
* @return XML representation of the page
41
*/
42
public String asXml();
43
44
/**
45
* Get all forms on the page
46
* @return List of HtmlForm objects
47
*/
48
public List<HtmlForm> getForms();
49
50
/**
51
* Get all anchor/link elements on the page
52
* @return List of HtmlAnchor objects
53
*/
54
public List<HtmlAnchor> getAnchors();
55
56
/**
57
* Find a form by its name attribute
58
* @param name the name attribute value
59
* @return HtmlForm with matching name, or null if not found
60
*/
61
public HtmlForm getFormByName(String name);
62
63
/**
64
* Find an element by its name attribute
65
* @param name the name attribute value
66
* @return HtmlElement with matching name, or null if not found
67
*/
68
public HtmlElement getElementByName(String name);
69
70
/**
71
* Refresh the current page
72
* @throws IOException if refresh fails
73
*/
74
public void refresh() throws IOException;
75
76
/**
77
* Get the document element (root HTML element)
78
* @return the root HtmlElement of the document
79
*/
80
public HtmlElement getDocumentElement();
81
82
/**
83
* Find elements using XPath expressions
84
* @param xpathExpression the XPath expression to evaluate
85
* @return List of objects matching the XPath (may be nodes, strings, numbers, etc.)
86
*/
87
public List<?> getByXPath(String xpathExpression);
88
89
/**
90
* Find the first element using XPath expression
91
* @param xpathExpression the XPath expression to evaluate
92
* @return the first matching object, or null if no match
93
*/
94
public Object getFirstByXPath(String xpathExpression);
95
}
96
```
97
98
**Usage Examples:**
99
100
```java
101
HtmlPage page = webClient.getPage("https://example.com");
102
103
// Basic page information
104
System.out.println("Title: " + page.getTitleText());
105
System.out.println("Text content: " + page.asNormalizedText());
106
107
// Element selection
108
HtmlElement loginDiv = page.getElementById("login");
109
List<HtmlElement> paragraphs = page.getElementsByTagName("p");
110
111
// Form and link access
112
List<HtmlForm> forms = page.getForms();
113
List<HtmlAnchor> links = page.getAnchors();
114
115
// Find specific elements
116
HtmlForm loginForm = page.getFormByName("loginForm");
117
HtmlElement usernameField = page.getElementByName("username");
118
119
// XPath element selection
120
List<?> xpathResults = page.getByXPath("//div[@class='content']//p");
121
for (Object result : xpathResults) {
122
if (result instanceof HtmlElement) {
123
HtmlElement element = (HtmlElement) result;
124
System.out.println("Found: " + element.asNormalizedText());
125
}
126
}
127
128
// Get first element matching XPath
129
HtmlElement firstButton = (HtmlElement) page.getFirstByXPath("//button[@type='submit']");
130
if (firstButton != null) {
131
firstButton.click();
132
}
133
```
134
135
### DOM Node Navigation
136
137
Base DOM node functionality providing tree navigation and content access.
138
139
```java { .api }
140
public abstract class DomNode {
141
/**
142
* Get the parent node
143
* @return parent DomNode, or null if this is the root
144
*/
145
public DomNode getParentNode();
146
147
/**
148
* Get all child nodes
149
* @return DomNodeList containing all child nodes
150
*/
151
public DomNodeList<DomNode> getChildNodes();
152
153
/**
154
* Get the first child node
155
* @return first child DomNode, or null if no children
156
*/
157
public DomNode getFirstChild();
158
159
/**
160
* Get the last child node
161
* @return last child DomNode, or null if no children
162
*/
163
public DomNode getLastChild();
164
165
/**
166
* Get the next sibling node
167
* @return next sibling DomNode, or null if this is the last sibling
168
*/
169
public DomNode getNextSibling();
170
171
/**
172
* Get the previous sibling node
173
* @return previous sibling DomNode, or null if this is the first sibling
174
*/
175
public DomNode getPreviousSibling();
176
177
/**
178
* Get the node name (tag name for elements)
179
* @return the node name
180
*/
181
public String getNodeName();
182
183
/**
184
* Get the node value (text content for text nodes)
185
* @return the node value
186
*/
187
public String getNodeValue();
188
189
/**
190
* Get the text content of this node and all descendants
191
* @return combined text content
192
*/
193
public String getTextContent();
194
195
/**
196
* Remove this node from the DOM tree
197
*/
198
public void remove();
199
200
/**
201
* Get XML representation of this node
202
* @return XML string representation
203
*/
204
public String asXml();
205
}
206
```
207
208
**Usage Examples:**
209
210
```java
211
HtmlElement element = page.getElementById("content");
212
213
// Navigate the DOM tree
214
DomNode parent = element.getParentNode();
215
DomNodeList<DomNode> children = element.getChildNodes();
216
DomNode firstChild = element.getFirstChild();
217
DomNode nextSibling = element.getNextSibling();
218
219
// Extract content
220
String nodeName = element.getNodeName(); // "div"
221
String textContent = element.getTextContent();
222
223
// Modify DOM
224
element.remove(); // Remove element from page
225
```
226
227
### HTML Element Interaction
228
229
Base functionality for all HTML elements including attribute access, event simulation, and focus management.
230
231
```java { .api }
232
public abstract class HtmlElement extends DomElement {
233
/**
234
* Simulate a mouse click on this element
235
* @return the Page that loads as a result of the click
236
* @throws IOException if the click causes a navigation error
237
*/
238
public <P extends Page> P click() throws IOException;
239
240
/**
241
* Get an attribute value
242
* @param name the attribute name
243
* @return the attribute value, or empty string if not present
244
*/
245
public String getAttribute(String name);
246
247
/**
248
* Set an attribute value
249
* @param name the attribute name
250
* @param value the attribute value
251
*/
252
public void setAttribute(String name, String value);
253
254
/**
255
* Remove an attribute
256
* @param name the attribute name to remove
257
*/
258
public void removeAttribute(String name);
259
260
/**
261
* Check if an attribute exists
262
* @param name the attribute name to check
263
* @return true if the attribute exists
264
*/
265
public boolean hasAttribute(String name);
266
267
/**
268
* Get the element's ID attribute
269
* @return the ID value, or empty string if not set
270
*/
271
public String getId();
272
273
/**
274
* Set the element's ID attribute
275
* @param id the new ID value
276
*/
277
public void setId(String id);
278
279
/**
280
* Get the tag name (e.g., "div", "p", "input")
281
* @return the tag name in lowercase
282
*/
283
public String getTagName();
284
285
/**
286
* Set focus on this element
287
*/
288
public void focus();
289
290
/**
291
* Remove focus from this element
292
*/
293
public void blur();
294
295
/**
296
* Get all descendant elements with the specified tag name
297
* @param name the tag name to search for
298
* @return DomNodeList of HtmlElement objects with matching tag names
299
*/
300
public DomNodeList<HtmlElement> getElementsByTagName(String name);
301
302
/**
303
* Find elements using XPath expressions (inherited from DomNode)
304
* @param xpathExpression the XPath expression to evaluate
305
* @return List of objects matching the XPath
306
*/
307
public List<?> getByXPath(String xpathExpression);
308
309
/**
310
* Find the first element using XPath expression (inherited from DomNode)
311
* @param xpathExpression the XPath expression to evaluate
312
* @return the first matching object, or null if no match
313
*/
314
public Object getFirstByXPath(String xpathExpression);
315
316
/**
317
* Check if element is displayed (visible) on the page
318
* @return true if element is visible
319
*/
320
public boolean isDisplayed();
321
322
/**
323
* Get the element's offset height (including padding and border)
324
* @return height in pixels
325
*/
326
public int getOffsetHeight();
327
328
/**
329
* Get the element's offset width (including padding and border)
330
* @return width in pixels
331
*/
332
public int getOffsetWidth();
333
334
/**
335
* Get the element's client height (content + padding, excluding border)
336
* @return height in pixels
337
*/
338
public int getClientHeight();
339
340
/**
341
* Get the element's client width (content + padding, excluding border)
342
* @return width in pixels
343
*/
344
public int getClientWidth();
345
}
346
```
347
348
**Usage Examples:**
349
350
```java
351
HtmlElement button = page.getElementById("submitBtn");
352
353
// Element interaction
354
Page resultPage = button.click(); // Click the button
355
356
// Attribute manipulation
357
String className = button.getAttribute("class");
358
button.setAttribute("class", "btn btn-primary");
359
button.removeAttribute("disabled");
360
boolean hasId = button.hasAttribute("id");
361
362
// Focus management
363
button.focus(); // Give focus to element
364
button.blur(); // Remove focus
365
366
// Element identification
367
String tagName = button.getTagName(); // "button"
368
String id = button.getId();
369
370
// Find nested elements
371
DomNodeList<DomElement> nestedSpans = button.getElementsByTagName("span");
372
373
// XPath searches within element
374
List<?> childButtons = button.getByXPath(".//button");
375
HtmlElement firstChild = (HtmlElement) button.getFirstByXPath(".//*[@class='important']");
376
377
// Element visibility and dimensions
378
boolean isVisible = button.isDisplayed();
379
int elementHeight = button.getOffsetHeight();
380
int elementWidth = button.getOffsetWidth();
381
int contentHeight = button.getClientHeight();
382
int contentWidth = button.getClientWidth();
383
384
System.out.println("Element dimensions: " + elementWidth + "x" + elementHeight);
385
System.out.println("Content area: " + contentWidth + "x" + contentHeight);
386
System.out.println("Visible: " + isVisible);
387
```
388
389
### Anchor Link Interaction
390
391
Handle anchor/link elements with navigation and URL access capabilities.
392
393
```java { .api }
394
public class HtmlAnchor extends HtmlElement {
395
/**
396
* Click the link and navigate to the target
397
* @return the Page that loads as a result of clicking the link
398
* @throws IOException if navigation fails
399
*/
400
public <P extends Page> P click() throws IOException;
401
402
/**
403
* Get the href attribute value
404
* @return the href attribute value
405
*/
406
public String getHrefAttribute();
407
408
/**
409
* Get the href as a resolved URL object
410
* @return URL object representing the link target
411
* @throws MalformedURLException if the href is not a valid URL
412
*/
413
public URL getHrefAsLink() throws MalformedURLException;
414
415
/**
416
* Get the target attribute value
417
* @return the target attribute value (e.g., "_blank", "_self")
418
*/
419
public String getTarget();
420
}
421
```
422
423
**Usage Examples:**
424
425
```java
426
// Find links by text content
427
HtmlAnchor link = null;
428
for (HtmlAnchor anchor : page.getAnchors()) {
429
if ("Next Page".equals(anchor.getTextContent().trim())) {
430
link = anchor;
431
break;
432
}
433
}
434
435
if (link != null) {
436
// Get link information
437
String href = link.getHrefAttribute();
438
URL targetUrl = link.getHrefAsLink();
439
String target = link.getTarget();
440
441
// Navigate by clicking
442
HtmlPage nextPage = link.click();
443
}
444
```
445
446
### Image Element Access
447
448
Handle image elements with source and dimension information.
449
450
```java { .api }
451
public class HtmlImage extends HtmlElement {
452
/**
453
* Get the src attribute value
454
* @return the image source URL
455
*/
456
public String getSrcAttribute();
457
458
/**
459
* Get the alt attribute value
460
* @return the alternative text
461
*/
462
public String getAltAttribute();
463
464
/**
465
* Get the image width
466
* @return the width in pixels
467
*/
468
public int getWidth();
469
470
/**
471
* Get the image height
472
* @return the height in pixels
473
*/
474
public int getHeight();
475
}
476
```
477
478
**Usage Examples:**
479
480
```java
481
List<HtmlElement> images = page.getElementsByTagName("img");
482
for (HtmlElement element : images) {
483
if (element instanceof HtmlImage) {
484
HtmlImage img = (HtmlImage) element;
485
String src = img.getSrcAttribute();
486
String alt = img.getAltAttribute();
487
int width = img.getWidth();
488
int height = img.getHeight();
489
490
System.out.println("Image: " + src + " (" + width + "x" + height + ")");
491
}
492
}
493
```
494
495
### Table Element Navigation
496
497
Access table structure including rows, cells, headers, and table sections.
498
499
```java { .api }
500
public class HtmlTable extends HtmlElement {
501
/**
502
* Get all rows in the table
503
* @return List of HtmlTableRow objects
504
*/
505
public List<HtmlTableRow> getRows();
506
507
/**
508
* Get a specific row by index
509
* @param index the row index (0-based)
510
* @return HtmlTableRow at the specified index
511
*/
512
public HtmlTableRow getRow(int index);
513
514
/**
515
* Get the table header section
516
* @return HtmlTableHeader, or null if not present
517
*/
518
public HtmlTableHeader getHeader();
519
520
/**
521
* Get the table footer section
522
* @return HtmlTableFooter, or null if not present
523
*/
524
public HtmlTableFooter getFooter();
525
526
/**
527
* Get all table body sections
528
* @return List of HtmlTableBody objects
529
*/
530
public List<HtmlTableBody> getBodies();
531
}
532
533
public class HtmlTableRow extends HtmlElement {
534
/**
535
* Get all cells in this row
536
* @return List of HtmlTableCell objects
537
*/
538
public List<HtmlTableCell> getCells();
539
540
/**
541
* Get a specific cell by index
542
* @param index the cell index (0-based)
543
* @return HtmlTableCell at the specified index
544
*/
545
public HtmlTableCell getCell(int index);
546
}
547
548
public class HtmlTableCell extends HtmlElement {
549
// Inherits all HtmlElement methods for content access
550
}
551
```
552
553
**Usage Examples:**
554
555
```java
556
HtmlTable table = (HtmlTable) page.getElementById("dataTable");
557
558
// Access table structure
559
List<HtmlTableRow> rows = table.getRows();
560
HtmlTableHeader header = table.getHeader();
561
562
// Process table data
563
for (HtmlTableRow row : rows) {
564
List<HtmlTableCell> cells = row.getCells();
565
for (int i = 0; i < cells.size(); i++) {
566
HtmlTableCell cell = cells.get(i);
567
String cellText = cell.getTextContent().trim();
568
System.out.println("Cell[" + i + "]: " + cellText);
569
}
570
}
571
572
// Access specific cell
573
HtmlTableCell firstCell = table.getRow(0).getCell(0);
574
String cellContent = firstCell.getTextContent();
575
```