CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-net-sourceforge-htmlunit--htmlunit

A headless browser intended for use in testing web-based applications.

Pending
Overview
Eval results
Files

html-dom.mddocs/

HTML DOM Manipulation

Comprehensive HTML DOM access and manipulation capabilities including element lookup, CSS selectors, XPath queries, and DOM tree navigation. Essential for web scraping and automated testing.

Capabilities

HtmlPage Class

Primary interface for HTML page manipulation and content extraction.

/**
 * HTML page with full DOM manipulation support
 */
public class HtmlPage extends SgmlPage {
    /** Get element by ID attribute */
    public HtmlElement getElementById(String id);
    
    /** Get elements by tag name */
    public List<HtmlElement> getElementsByTagName(String tagName);
    
    /** Get elements by name attribute */
    public List<HtmlElement> getElementsByName(String name);
    
    /** Get elements by CSS class name */
    public List<HtmlElement> getElementsByClassName(String className);
    
    /** Get first element matching CSS selector */
    public HtmlElement querySelector(String selectors);
    
    /** Get all elements matching CSS selector */
    public List<HtmlElement> querySelectorAll(String selectors);
    
    /** Get first node matching XPath expression */
    public DomNode getFirstByXPath(String xpathExpr);
    
    /** Get all nodes matching XPath expression */
    public List<?> getByXPath(String xpathExpr);
    
    /** Get element by name (first match) */
    public HtmlElement getElementByName(String name);
    
    /** Get typed element by ID */
    public <T extends HtmlElement> T getHtmlElementById(String id);
    
    /** Get all forms on the page */
    public List<HtmlForm> getForms();
    
    /** Get form by name attribute */
    public HtmlForm getFormByName(String name);
    
    /** Get all anchor elements */
    public List<HtmlAnchor> getAnchors();
    
    /** Get anchor by name attribute */
    public HtmlAnchor getAnchorByName(String name);
    
    /** Get anchor by href attribute */
    public HtmlAnchor getAnchorByHref(String href);
    
    /** Get page title text */
    public String getTitleText();
    
    /** Get page content as plain text */
    public String asText();
    
    /** Get page content as XML string */
    public String asXml();
    
    /** Get head element */
    public HtmlHead getHead();
    
    /** Get body element */
    public HtmlBody getBody();
    
    /** Get document element (html) */
    public HtmlHtml getDocumentElement();
    
    /** Execute JavaScript code in page context */
    public ScriptResult executeJavaScript(String sourceCode);
    
    /** Execute JavaScript with source location info */
    public ScriptResult executeJavaScript(String sourceCode, String sourceName, int startLine);
    
    /** Click anywhere on the page */
    public <P extends Page> P click() throws IOException;
    
    /** Get character set encoding */
    public Charset getCharset();
    
    /** Refresh the page */
    public void refresh() throws IOException;
    
    /** Check if page has been modified */
    public boolean hasFeature(PageFeature feature);
}

Usage Examples:

import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlElement;

try (WebClient webClient = new WebClient()) {
    HtmlPage page = webClient.getPage("https://example.com");
    
    // Element lookup by ID
    HtmlElement loginButton = page.getElementById("login-btn");
    if (loginButton != null) {
        loginButton.click();
    }
    
    // CSS selector queries
    HtmlElement navbar = page.querySelector(".navbar");
    List<HtmlElement> menuItems = page.querySelectorAll(".menu-item");
    
    // XPath queries
    List<?> allLinks = page.getByXPath("//a[@href]");
    HtmlElement firstParagraph = (HtmlElement) page.getFirstByXPath("//p[1]");
    
    // Content extraction
    String pageTitle = page.getTitleText();
    String pageText = page.asText();
    String pageHtml = page.asXml();
    
    System.out.println("Title: " + pageTitle);
    System.out.println("Found " + menuItems.size() + " menu items");
}

DomNode Class

Base class for all DOM nodes providing tree navigation and manipulation.

/**
 * Base class for all DOM nodes
 */
public abstract class DomNode implements Cloneable {
    /** Get parent node */
    public DomNode getParentNode();
    
    /** Get all child nodes */
    public DomNodeList<DomNode> getChildNodes();
    
    /** Get first child node */
    public DomNode getFirstChild();
    
    /** Get last child node */
    public DomNode getLastChild();
    
    /** Get next sibling node */
    public DomNode getNextSibling();
    
    /** Get previous sibling node */
    public DomNode getPreviousSibling();
    
    /** Get node name */
    public String getNodeName();
    
    /** Get node value */
    public String getNodeValue();
    
    /** Set node value */
    public void setNodeValue(String nodeValue);
    
    /** Get DOM node type constant */
    public short getNodeType();
    
    /** Append child node */
    public DomNode appendChild(DomNode node);
    
    /** Insert child before reference node */
    public DomNode insertBefore(DomNode newChild, DomNode refChild);
    
    /** Replace child node */
    public DomNode replaceChild(DomNode newChild, DomNode oldChild);
    
    /** Remove child node */
    public DomNode removeChild(DomNode child);
    
    /** Remove this node from parent */
    public void remove();
    
    /** Clone node (deep or shallow) */
    public DomNode cloneNode(boolean deep);
    
    /** Check if node has child nodes */
    public boolean hasChildNodes();
    
    /** Get text content of node and descendants */
    public String getTextContent();
    
    /** Set text content */
    public void setTextContent(String textContent);
    
    /** Get node as text representation */
    public String asText();
    
    /** Get node as XML string */
    public String asXml();
    
    /** Get containing page */
    public SgmlPage getPage();
    
    /** Get containing document */
    public DomDocumentFragment getOwnerDocument();
    
    /** Get nodes matching XPath expression */
    public List<?> getByXPath(String xpathExpr);
    
    /** Get canonical XPath to this node */
    public String getCanonicalXPath();
    
    /** Check if node is ancestor of another node */
    public boolean isAncestorOf(DomNode node);
    
    /** Check if node is descendant of another node */
    public boolean isDescendantOf(DomNode node);
}

DomElement Class

Base class for HTML/XML elements with attribute management.

/**
 * Base class for HTML and XML elements
 */
public class DomElement extends DomNode {
    /** Get element tag name */
    public String getTagName();
    
    /** Get attribute value */
    public String getAttribute(String attributeName);
    
    /** Get attribute value with namespace */
    public String getAttributeNS(String namespaceURI, String localName);
    
    /** Set attribute value */
    public void setAttribute(String attributeName, String attributeValue);
    
    /** Set attribute with namespace */
    public void setAttributeNS(String namespaceURI, String qualifiedName, String attributeValue);
    
    /** Remove attribute */
    public void removeAttribute(String attributeName);
    
    /** Remove attribute with namespace */
    public void removeAttributeNS(String namespaceURI, String localName);
    
    /** Check if attribute exists */
    public boolean hasAttribute(String attributeName);
    
    /** Check if namespaced attribute exists */
    public boolean hasAttributeNS(String namespaceURI, String localName);
    
    /** Get all attribute names */
    public Iterable<String> getAttributeNames();
    
    /** Get element ID */
    public String getId();
    
    /** Set element ID */
    public void setId(String id);
    
    /** Get CSS class names */
    public String getClassName();
    
    /** Set CSS class names */
    public void setClassName(String className);
    
    /** Add CSS class */
    public void addClass(String className);
    
    /** Remove CSS class */
    public void removeClass(String className);
    
    /** Check if has CSS class */
    public boolean hasClass(String className);
    
    /** Get elements by tag name (descendants) */
    public DomNodeList<DomElement> getElementsByTagName(String name);
    
    /** Get elements by tag name with namespace */
    public DomNodeList<DomElement> getElementsByTagNameNS(String namespaceURI, String localName);
    
    /** Click the element */
    public <P extends Page> P click() throws IOException;
    
    /** Double-click the element */
    public <P extends Page> P dblClick() throws IOException;
    
    /** Right-click the element */
    public <P extends Page> P rightClick() throws IOException;
    
    /** Focus the element */
    public void focus();
    
    /** Remove focus from element */
    public void blur();
    
    /** Check if element is displayed */
    public boolean isDisplayed();
    
    /** Get element's computed style */
    public ComputedCSSStyleDeclaration getComputedStyle();
    
    /** Get element's inline style */
    public ElementCssStyleDeclaration getStyle();
    
    /** Check if element matches CSS selector */
    public boolean matches(String selector);
    
    /** Get closest ancestor matching selector */
    public DomElement closest(String selector);
}

Usage Examples:

import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HtmlElement;

// Element attribute manipulation
HtmlElement element = page.getElementById("myElement");
element.setAttribute("data-value", "123");
String value = element.getAttribute("data-value");
boolean hasClass = element.hasAttribute("class");

// CSS class manipulation
element.addClass("active");
element.removeClass("disabled");
boolean isActive = element.hasClass("active");

// DOM tree navigation
DomNode parent = element.getParentNode();
DomNodeList<DomNode> children = element.getChildNodes();
DomNode nextSibling = element.getNextSibling();

// Element interaction
if (element.isDisplayed()) {
    HtmlPage newPage = element.click();
}

HtmlElement Class

Specialized HTML element with additional HTML-specific functionality.

/**
 * Base class for HTML elements with interaction capabilities
 */
public abstract class HtmlElement extends DomElement {
    /** Get element's lang attribute */
    public String getLangAttribute();
    
    /** Get element's dir attribute */
    public String getDirAttribute();
    
    /** Get element's title attribute */
    public String getTitleAttribute();
    
    /** Get element's tabindex */
    public String getTabIndexAttribute();
    
    /** Get element's onclick attribute */
    public String getOnClickAttribute();
    
    /** Get element's ondblclick attribute */
    public String getOnDblClickAttribute();
    
    /** Get element's onmousedown attribute */
    public String getOnMouseDownAttribute();
    
    /** Get element's onmouseup attribute */
    public String getOnMouseUpAttribute();
    
    /** Get element's onmouseover attribute */
    public String getOnMouseOverAttribute();
    
    /** Get element's onmousemove attribute */
    public String getOnMouseMoveAttribute();
    
    /** Get element's onmouseout attribute */
    public String getOnMouseOutAttribute();
    
    /** Get element's onkeypress attribute */
    public String getOnKeyPressAttribute();
    
    /** Get element's onkeydown attribute */
    public String getOnKeyDownAttribute();
    
    /** Get element's onkeyup attribute */
    public String getOnKeyUpAttribute();
    
    /** Type text into element (fires keyboard events) */
    public void type(String text) throws IOException;
    
    /** Type single character */
    public void type(char c) throws IOException;
    
    /** Simulate key press */
    public void keyDown(int keyCode);
    
    /** Simulate key release */
    public void keyUp(int keyCode);
    
    /** Get element's bounding rectangle */
    public Rectangle getBoundingClientRect();
    
    /** Scroll element into view */
    public void scrollIntoView();
    
    /** Check if element is enabled */
    public boolean isEnabled();
    
    /** Get element's offset parent */
    public HtmlElement getOffsetParent();
    
    /** Get offset dimensions */
    public int getOffsetHeight();
    public int getOffsetWidth();
    public int getOffsetLeft();
    public int getOffsetTop();
    
    /** Get client dimensions */
    public int getClientHeight();
    public int getClientWidth();
    public int getClientLeft();
    public int getClientTop();
    
    /** Get scroll dimensions */
    public int getScrollHeight();
    public int getScrollWidth();
    public int getScrollLeft();
    public int getScrollTop();
    
    /** Set scroll position */
    public void setScrollLeft(int scrollLeft);
    public void setScrollTop(int scrollTop);
}

DOM Node Types

/**
 * DOM node type constants (from DOM specification)
 */
public interface Node {
    public static final short ELEMENT_NODE = 1;
    public static final short ATTRIBUTE_NODE = 2;
    public static final short TEXT_NODE = 3;
    public static final short CDATA_SECTION_NODE = 4;
    public static final short ENTITY_REFERENCE_NODE = 5;
    public static final short ENTITY_NODE = 6;
    public static final short PROCESSING_INSTRUCTION_NODE = 7;
    public static final short COMMENT_NODE = 8;
    public static final short DOCUMENT_NODE = 9;
    public static final short DOCUMENT_TYPE_NODE = 10;
    public static final short DOCUMENT_FRAGMENT_NODE = 11;
    public static final short NOTATION_NODE = 12;
}

Common HTML Elements

/**
 * HTML document structure elements
 */
public class HtmlHtml extends HtmlElement {
    // Root HTML element
}

public class HtmlHead extends HtmlElement {
    // Document head element
}

public class HtmlBody extends HtmlElement {
    // Document body element
}

public class HtmlTitle extends HtmlElement {
    // Document title element
}

/**
 * Common content elements
 */
public class HtmlDivision extends HtmlElement {
    // Division element (<div>)
}

public class HtmlSpan extends HtmlElement {
    // Span element (<span>)
}

public class HtmlParagraph extends HtmlElement {
    // Paragraph element (<p>)
}

public class HtmlHeading1 extends HtmlElement {
    // H1 heading element
}

public class HtmlHeading2 extends HtmlElement {
    // H2 heading element
}

public class HtmlHeading3 extends HtmlElement {
    // H3 heading element
}

public class HtmlHeading4 extends HtmlElement {
    // H4 heading element
}

public class HtmlHeading5 extends HtmlElement {
    // H5 heading element
}

public class HtmlHeading6 extends HtmlElement {
    // H6 heading element
}

/**
 * List elements
 */
public class HtmlUnorderedList extends HtmlElement {
    // <ul> element
}

public class HtmlOrderedList extends HtmlElement {
    // <ol> element
}

public class HtmlListItem extends HtmlElement {
    // <li> element
}

/**
 * Text formatting elements
 */
public class HtmlEmphasis extends HtmlElement {
    // <em> element
}

public class HtmlStrong extends HtmlElement {
    // <strong> element
}

public class HtmlBold extends HtmlElement {
    // <b> element
}

public class HtmlItalic extends HtmlElement {
    // <i> element
}

CSS Selector Support

HtmlUnit supports comprehensive CSS selector syntax:

Basic Selectors:

  • Element: div, p, span
  • Class: .className, .class1.class2
  • ID: #elementId
  • Attribute: [attribute], [attribute="value"], [attribute^="prefix"]

Combinators:

  • Descendant: div p (p inside div)
  • Child: div > p (p direct child of div)
  • Adjacent sibling: h1 + p (p immediately after h1)
  • General sibling: h1 ~ p (p sibling after h1)

Pseudo-classes:

  • :first-child, :last-child, :nth-child(n)
  • :first-of-type, :last-of-type, :nth-of-type(n)
  • :not(selector)
  • :checked, :disabled, :enabled

Usage Example:

// Complex CSS selectors
List<HtmlElement> activeMenuItems = page.querySelectorAll(".menu .item.active");
HtmlElement firstTableCell = page.querySelector("table tr:first-child td:first-child");
List<HtmlElement> checkedInputs = page.querySelectorAll("input[type='checkbox']:checked");
HtmlElement submitButton = page.querySelector("form input[type='submit'], form button[type='submit']");

Install with Tessl CLI

npx tessl i tessl/maven-net-sourceforge-htmlunit--htmlunit

docs

browser-automation.md

cookies.md

forms.md

html-dom.md

http.md

index.md

javascript.md

windows.md

tile.json