CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-htmlunit--htmlunit

A headless browser for Java programs that provides web automation, form handling, JavaScript execution, and DOM manipulation capabilities.

Pending
Overview
Eval results
Files

page-dom.mddocs/

Page and DOM Interaction

HTML page representation and DOM manipulation capabilities providing complete access to page structure, element selection, content extraction, and DOM navigation.

Capabilities

HTML Page Access

The main interface for interacting with HTML documents, providing methods for element selection and content extraction.

public class HtmlPage extends SgmlPage {
    /**
     * Get an element by its ID attribute
     * @param id the ID to search for
     * @return DomElement with the specified ID, or null if not found
     */
    public DomElement getElementById(String id);
    
    /**
     * Get all elements with the specified tag name (inherited from DomElement)
     * @param name the tag name to search for
     * @return DomNodeList of HtmlElement objects with matching tag names
     */
    public DomNodeList<HtmlElement> getElementsByTagName(String name);
    
    /**
     * Get the page title text
     * @return the text content of the title element
     */
    public String getTitleText();
    
    /**
     * Get the page content as normalized text (without HTML tags)
     * @return normalized text representation of the page
     */
    public String asNormalizedText();
    
    /**
     * Get the page content as XML
     * @return XML representation of the page
     */
    public String asXml();
    
    /**
     * Get all forms on the page
     * @return List of HtmlForm objects
     */
    public List<HtmlForm> getForms();
    
    /**
     * Get all anchor/link elements on the page
     * @return List of HtmlAnchor objects
     */
    public List<HtmlAnchor> getAnchors();
    
    /**
     * Find a form by its name attribute
     * @param name the name attribute value
     * @return HtmlForm with matching name, or null if not found
     */
    public HtmlForm getFormByName(String name);
    
    /**
     * Find an element by its name attribute
     * @param name the name attribute value
     * @return HtmlElement with matching name, or null if not found
     */
    public HtmlElement getElementByName(String name);
    
    /**
     * Refresh the current page
     * @throws IOException if refresh fails
     */
    public void refresh() throws IOException;
    
    /**
     * Get the document element (root HTML element)
     * @return the root HtmlElement of the document
     */
    public HtmlElement getDocumentElement();
    
    /**
     * Find elements using XPath expressions
     * @param xpathExpression the XPath expression to evaluate
     * @return List of objects matching the XPath (may be nodes, strings, numbers, etc.)
     */
    public List<?> getByXPath(String xpathExpression);
    
    /**
     * Find the first element using XPath expression
     * @param xpathExpression the XPath expression to evaluate
     * @return the first matching object, or null if no match
     */
    public Object getFirstByXPath(String xpathExpression);
}

Usage Examples:

HtmlPage page = webClient.getPage("https://example.com");

// Basic page information
System.out.println("Title: " + page.getTitleText());
System.out.println("Text content: " + page.asNormalizedText());

// Element selection
HtmlElement loginDiv = page.getElementById("login");
List<HtmlElement> paragraphs = page.getElementsByTagName("p");

// Form and link access
List<HtmlForm> forms = page.getForms();
List<HtmlAnchor> links = page.getAnchors();

// Find specific elements
HtmlForm loginForm = page.getFormByName("loginForm");
HtmlElement usernameField = page.getElementByName("username");

// XPath element selection
List<?> xpathResults = page.getByXPath("//div[@class='content']//p");
for (Object result : xpathResults) {
    if (result instanceof HtmlElement) {
        HtmlElement element = (HtmlElement) result;
        System.out.println("Found: " + element.asNormalizedText());
    }
}

// Get first element matching XPath
HtmlElement firstButton = (HtmlElement) page.getFirstByXPath("//button[@type='submit']");
if (firstButton != null) {
    firstButton.click();
}

DOM Node Navigation

Base DOM node functionality providing tree navigation and content access.

public abstract class DomNode {
    /**
     * Get the parent node
     * @return parent DomNode, or null if this is the root
     */
    public DomNode getParentNode();
    
    /**
     * Get all child nodes
     * @return DomNodeList containing all child nodes
     */
    public DomNodeList<DomNode> getChildNodes();
    
    /**
     * Get the first child node
     * @return first child DomNode, or null if no children
     */
    public DomNode getFirstChild();
    
    /**
     * Get the last child node
     * @return last child DomNode, or null if no children
     */
    public DomNode getLastChild();
    
    /**
     * Get the next sibling node
     * @return next sibling DomNode, or null if this is the last sibling
     */
    public DomNode getNextSibling();
    
    /**
     * Get the previous sibling node
     * @return previous sibling DomNode, or null if this is the first sibling
     */
    public DomNode getPreviousSibling();
    
    /**
     * Get the node name (tag name for elements)
     * @return the node name
     */
    public String getNodeName();
    
    /**
     * Get the node value (text content for text nodes)
     * @return the node value
     */
    public String getNodeValue();
    
    /**
     * Get the text content of this node and all descendants
     * @return combined text content
     */
    public String getTextContent();
    
    /**
     * Remove this node from the DOM tree
     */
    public void remove();
    
    /**
     * Get XML representation of this node
     * @return XML string representation
     */
    public String asXml();
}

Usage Examples:

HtmlElement element = page.getElementById("content");

// Navigate the DOM tree
DomNode parent = element.getParentNode();
DomNodeList<DomNode> children = element.getChildNodes();
DomNode firstChild = element.getFirstChild();
DomNode nextSibling = element.getNextSibling();

// Extract content
String nodeName = element.getNodeName(); // "div"
String textContent = element.getTextContent();

// Modify DOM
element.remove(); // Remove element from page

HTML Element Interaction

Base functionality for all HTML elements including attribute access, event simulation, and focus management.

public abstract class HtmlElement extends DomElement {
    /**
     * Simulate a mouse click on this element
     * @return the Page that loads as a result of the click
     * @throws IOException if the click causes a navigation error
     */
    public <P extends Page> P click() throws IOException;
    
    /**
     * Get an attribute value
     * @param name the attribute name
     * @return the attribute value, or empty string if not present
     */
    public String getAttribute(String name);
    
    /**
     * Set an attribute value
     * @param name the attribute name
     * @param value the attribute value
     */
    public void setAttribute(String name, String value);
    
    /**
     * Remove an attribute
     * @param name the attribute name to remove
     */
    public void removeAttribute(String name);
    
    /**
     * Check if an attribute exists
     * @param name the attribute name to check
     * @return true if the attribute exists
     */
    public boolean hasAttribute(String name);
    
    /**
     * Get the element's ID attribute
     * @return the ID value, or empty string if not set
     */
    public String getId();
    
    /**
     * Set the element's ID attribute
     * @param id the new ID value
     */
    public void setId(String id);
    
    /**
     * Get the tag name (e.g., "div", "p", "input")
     * @return the tag name in lowercase
     */
    public String getTagName();
    
    /**
     * Set focus on this element
     */
    public void focus();
    
    /**
     * Remove focus from this element
     */
    public void blur();
    
    /**
     * Get all descendant elements with the specified tag name
     * @param name the tag name to search for
     * @return DomNodeList of HtmlElement objects with matching tag names
     */
    public DomNodeList<HtmlElement> getElementsByTagName(String name);
    
    /**
     * Find elements using XPath expressions (inherited from DomNode)
     * @param xpathExpression the XPath expression to evaluate
     * @return List of objects matching the XPath
     */
    public List<?> getByXPath(String xpathExpression);
    
    /**
     * Find the first element using XPath expression (inherited from DomNode)
     * @param xpathExpression the XPath expression to evaluate
     * @return the first matching object, or null if no match
     */
    public Object getFirstByXPath(String xpathExpression);
    
    /**
     * Check if element is displayed (visible) on the page
     * @return true if element is visible
     */
    public boolean isDisplayed();
    
    /**
     * Get the element's offset height (including padding and border)
     * @return height in pixels
     */
    public int getOffsetHeight();
    
    /**
     * Get the element's offset width (including padding and border)  
     * @return width in pixels
     */
    public int getOffsetWidth();
    
    /**
     * Get the element's client height (content + padding, excluding border)
     * @return height in pixels
     */
    public int getClientHeight();
    
    /**
     * Get the element's client width (content + padding, excluding border)
     * @return width in pixels
     */
    public int getClientWidth();
}

Usage Examples:

HtmlElement button = page.getElementById("submitBtn");

// Element interaction
Page resultPage = button.click(); // Click the button

// Attribute manipulation
String className = button.getAttribute("class");
button.setAttribute("class", "btn btn-primary");
button.removeAttribute("disabled");
boolean hasId = button.hasAttribute("id");

// Focus management
button.focus(); // Give focus to element
button.blur();  // Remove focus

// Element identification
String tagName = button.getTagName(); // "button"
String id = button.getId();

// Find nested elements
DomNodeList<DomElement> nestedSpans = button.getElementsByTagName("span");

// XPath searches within element
List<?> childButtons = button.getByXPath(".//button");
HtmlElement firstChild = (HtmlElement) button.getFirstByXPath(".//*[@class='important']");

// Element visibility and dimensions
boolean isVisible = button.isDisplayed();
int elementHeight = button.getOffsetHeight();
int elementWidth = button.getOffsetWidth();
int contentHeight = button.getClientHeight();
int contentWidth = button.getClientWidth();

System.out.println("Element dimensions: " + elementWidth + "x" + elementHeight);
System.out.println("Content area: " + contentWidth + "x" + contentHeight);
System.out.println("Visible: " + isVisible);

Anchor Link Interaction

Handle anchor/link elements with navigation and URL access capabilities.

public class HtmlAnchor extends HtmlElement {
    /**
     * Click the link and navigate to the target
     * @return the Page that loads as a result of clicking the link
     * @throws IOException if navigation fails
     */
    public <P extends Page> P click() throws IOException;
    
    /**
     * Get the href attribute value
     * @return the href attribute value
     */
    public String getHrefAttribute();
    
    /**
     * Get the href as a resolved URL object
     * @return URL object representing the link target
     * @throws MalformedURLException if the href is not a valid URL
     */
    public URL getHrefAsLink() throws MalformedURLException;
    
    /**
     * Get the target attribute value
     * @return the target attribute value (e.g., "_blank", "_self")
     */
    public String getTarget();
}

Usage Examples:

// Find links by text content
HtmlAnchor link = null;
for (HtmlAnchor anchor : page.getAnchors()) {
    if ("Next Page".equals(anchor.getTextContent().trim())) {
        link = anchor;
        break;
    }
}

if (link != null) {
    // Get link information
    String href = link.getHrefAttribute();
    URL targetUrl = link.getHrefAsLink();
    String target = link.getTarget();
    
    // Navigate by clicking
    HtmlPage nextPage = link.click();
}

Image Element Access

Handle image elements with source and dimension information.

public class HtmlImage extends HtmlElement {
    /**
     * Get the src attribute value
     * @return the image source URL
     */
    public String getSrcAttribute();
    
    /**
     * Get the alt attribute value  
     * @return the alternative text
     */
    public String getAltAttribute();
    
    /**
     * Get the image width
     * @return the width in pixels
     */
    public int getWidth();
    
    /**
     * Get the image height
     * @return the height in pixels  
     */
    public int getHeight();
}

Usage Examples:

List<HtmlElement> images = page.getElementsByTagName("img");
for (HtmlElement element : images) {
    if (element instanceof HtmlImage) {
        HtmlImage img = (HtmlImage) element;
        String src = img.getSrcAttribute();
        String alt = img.getAltAttribute();
        int width = img.getWidth();
        int height = img.getHeight();
        
        System.out.println("Image: " + src + " (" + width + "x" + height + ")");
    }
}

Table Element Navigation

Access table structure including rows, cells, headers, and table sections.

public class HtmlTable extends HtmlElement {
    /**
     * Get all rows in the table
     * @return List of HtmlTableRow objects
     */
    public List<HtmlTableRow> getRows();
    
    /**
     * Get a specific row by index
     * @param index the row index (0-based)
     * @return HtmlTableRow at the specified index
     */
    public HtmlTableRow getRow(int index);
    
    /**
     * Get the table header section
     * @return HtmlTableHeader, or null if not present
     */
    public HtmlTableHeader getHeader();
    
    /**
     * Get the table footer section
     * @return HtmlTableFooter, or null if not present
     */
    public HtmlTableFooter getFooter();
    
    /**
     * Get all table body sections
     * @return List of HtmlTableBody objects
     */
    public List<HtmlTableBody> getBodies();
}

public class HtmlTableRow extends HtmlElement {
    /**
     * Get all cells in this row
     * @return List of HtmlTableCell objects
     */
    public List<HtmlTableCell> getCells();
    
    /**
     * Get a specific cell by index
     * @param index the cell index (0-based)
     * @return HtmlTableCell at the specified index
     */
    public HtmlTableCell getCell(int index);
}

public class HtmlTableCell extends HtmlElement {
    // Inherits all HtmlElement methods for content access
}

Usage Examples:

HtmlTable table = (HtmlTable) page.getElementById("dataTable");

// Access table structure
List<HtmlTableRow> rows = table.getRows();
HtmlTableHeader header = table.getHeader();

// Process table data
for (HtmlTableRow row : rows) {
    List<HtmlTableCell> cells = row.getCells();
    for (int i = 0; i < cells.size(); i++) {
        HtmlTableCell cell = cells.get(i);
        String cellText = cell.getTextContent().trim();
        System.out.println("Cell[" + i + "]: " + cellText);
    }
}

// Access specific cell
HtmlTableCell firstCell = table.getRow(0).getCell(0);
String cellContent = firstCell.getTextContent();

Install with Tessl CLI

npx tessl i tessl/maven-org-htmlunit--htmlunit

docs

cookies.md

exceptions.md

forms.md

http.md

index.md

javascript.md

page-dom.md

web-client.md

windows.md

tile.json