or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

cookies.mdexceptions.mdforms.mdhttp.mdindex.mdjavascript.mdpage-dom.mdweb-client.mdwindows.md
tile.json

tessl/maven-org-htmlunit--htmlunit

A headless browser for Java programs that provides web automation, form handling, JavaScript execution, and DOM manipulation capabilities.

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
mavenpkg:maven/org.htmlunit/htmlunit@4.17.x

To install, run

npx @tessl/cli install tessl/maven-org-htmlunit--htmlunit@4.17.0

index.mddocs/

HtmlUnit

HtmlUnit is a headless web browser library for Java that models HTML documents and provides an API for programmatic web interaction. It enables form submission, link clicking, JavaScript execution, and DOM manipulation - simulating user browser behavior for automated testing and web scraping.

Package Information

  • Package Name: htmlunit
  • Package Type: maven
  • Language: Java
  • Installation: See below for Maven and Gradle

Maven

Add to your pom.xml:

<dependency>
    <groupId>org.htmlunit</groupId>
    <artifactId>htmlunit</artifactId>
    <version>4.17.0-SNAPSHOT</version>
</dependency>

Gradle

Add to your build.gradle:

implementation 'org.htmlunit:htmlunit:4.17.0-SNAPSHOT'

Core Imports

import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlPage;
import org.htmlunit.BrowserVersion;

For form handling:

import org.htmlunit.html.HtmlForm;
import org.htmlunit.html.HtmlTextInput;
import org.htmlunit.html.HtmlSubmitInput;
import org.htmlunit.html.HtmlSelect;

For HTTP requests:

import org.htmlunit.WebRequest;
import org.htmlunit.WebResponse;
import org.htmlunit.HttpMethod;

For JavaScript handling:

import org.htmlunit.AlertHandler;
import org.htmlunit.ConfirmHandler;
import org.htmlunit.JavaScriptErrorListener;

For cookie management:

import org.htmlunit.CookieManager;
import org.htmlunit.util.Cookie;

Basic Usage

import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlPage;
import org.htmlunit.html.HtmlForm;
import org.htmlunit.html.HtmlTextInput;
import org.htmlunit.BrowserVersion;

// Create web client
try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
    // Configure options
    webClient.getOptions().setJavaScriptEnabled(true);
    webClient.getOptions().setCssEnabled(false);
    webClient.getOptions().setThrowExceptionOnScriptError(false);
    
    // Navigate to page
    HtmlPage page = webClient.getPage("http://example.com");
    System.out.println("Page title: " + page.getTitleText());
    
    // Find and fill form
    HtmlForm form = page.getFormByName("loginForm");
    HtmlTextInput username = form.getInputByName("username");
    username.setValue("myuser");
    
    // Submit form
    HtmlPage result = form.getInputByValue("Login").click();
    System.out.println("Result: " + result.asNormalizedText());
}

Architecture

HtmlUnit is built around several key components:

  • WebClient: Main entry point managing browser configuration, cookie handling, and page navigation
  • Page Hierarchy: Type-safe page representations (HtmlPage, TextPage, UnexpectedPage) with full DOM access
  • HTML Elements: Complete DOM element model with interactive capabilities (forms, links, inputs)
  • JavaScript Engine: Integrated Rhino-based JavaScript execution with browser API simulation
  • HTTP Layer: Customizable HTTP connection handling with request/response processing
  • Browser Simulation: Accurate browser version emulation including user agents and feature support

Capabilities

Web Client Management

Core browser functionality including client configuration, page navigation, window management, and resource cleanup. Essential for all web automation tasks.

public class WebClient implements AutoCloseable {
    public WebClient();
    public WebClient(BrowserVersion browserVersion);
    public <P extends Page> P getPage(String url) throws IOException;
    public <P extends Page> P getPage(URL url) throws IOException; 
    public <P extends Page> P getPage(WebRequest request) throws IOException;
    public void close();
    public WebClientOptions getOptions();
    public BrowserVersion getBrowserVersion();
}

public class WebClientOptions {
    public void setJavaScriptEnabled(boolean enabled);
    public boolean isJavaScriptEnabled();
    public void setCssEnabled(boolean enabled);
    public void setThrowExceptionOnScriptError(boolean throwException);
    public void setTimeout(int timeout);
}

Web Client

Page and DOM Interaction

HTML page representation and DOM manipulation capabilities including element selection, content extraction, and page structure navigation.

public class HtmlPage extends SgmlPage {
    public DomElement getElementById(String id);
    public DomNodeList<HtmlElement> getElementsByTagName(String name);
    public String getTitleText();
    public String asNormalizedText();
    public List<HtmlForm> getForms();
    public List<HtmlAnchor> getAnchors();
}

public abstract class HtmlElement extends DomElement {
    public void click() throws IOException;
    public String getAttribute(String name);
    public void setAttribute(String name, String value);
    public String getId();
    public void focus();
}

Page and DOM

Form Handling

Comprehensive form interaction including input field manipulation, form submission, and all HTML form element types (text, password, checkbox, radio, select).

public class HtmlForm extends HtmlElement {
    public <P extends Page> P submit() throws IOException;
    public <P extends Page> P submit(SubmittableElement submitElement) throws IOException;
    public HtmlElement getInputByName(String name);
    public List<HtmlElement> getInputsByName(String name);
    public HtmlTextArea getTextAreaByName(String name);
    public HtmlSelect getSelectByName(String name);
}

public abstract class HtmlInput extends HtmlElement {
    public String getValue();
    public void setValue(String value);
    public String getName();
    public String getType();
}

Form Handling

HTTP Request and Response

HTTP communication layer providing request customization, response processing, header management, and connection configuration.

public class WebRequest {
    public WebRequest(URL url);
    public WebRequest(URL url, HttpMethod method);
    public URL getUrl();
    public HttpMethod getHttpMethod();
    public void setRequestBody(String body);
    public void setAdditionalHeader(String name, String value);
    public List<NameValuePair> getRequestParameters();
}

public class WebResponse {
    public int getStatusCode();
    public String getStatusMessage();
    public String getContentAsString();
    public String getContentType();
    public List<NameValuePair> getResponseHeaders();
}

HTTP Handling

JavaScript Integration

JavaScript engine configuration and event handling including script execution control, error handling, and browser API simulation.

public interface AlertHandler {
    void handleAlert(Page page, String message);
}

public interface ConfirmHandler {
    boolean handleConfirm(Page page, String message);
}

public interface JavaScriptErrorListener {
    void scriptException(HtmlPage page, ScriptException scriptException);
    void timeoutError(HtmlPage page, long allowedTime, long executionTime);
}

JavaScript

Cookie Management

Cookie handling and session management including cookie creation, retrieval, and automatic cookie processing for session maintenance.

public class CookieManager {
    public void setCookiesEnabled(boolean enabled);
    public boolean isCookiesEnabled();
    public Set<Cookie> getCookies();
    public void addCookie(Cookie cookie);
    public void removeCookie(Cookie cookie);
    public void clearExpired(Date date);
}

public class Cookie {
    public Cookie(String domain, String name, String value);
    public String getName();
    public String getValue();
    public String getDomain();
    public String getPath();
    public Date getExpires();
    public boolean isSecure();
    public boolean isHttpOnly();
}

Cookie Management

Window Management

Browser window and frame management including multiple window handling, window navigation, and frame interactions.

public interface WebWindow {
    public Page getEnclosedPage();
    public void setEnclosedPage(Page page);
    public String getName();
    public WebWindow getParentWindow();
    public WebWindow getTopWindow();
    public WebClient getWebClient();
}

public class TopLevelWindow implements WebWindow {
    // Main browser windows
}

public class FrameWindow implements WebWindow {
    // Frame and iframe windows
}

Window Management

Exception Handling

Error handling and exception management for HTTP errors, JavaScript errors, and element access failures.

public class FailingHttpStatusCodeException extends RuntimeException {
    public int getStatusCode();
    public String getStatusMessage();
    public WebResponse getResponse();
}

public class ElementNotFoundException extends RuntimeException {
    // Thrown when elements cannot be found
}

public class ScriptException extends RuntimeException {
    // JavaScript execution errors
}

Exception Handling

Types

public enum BrowserVersion {
    CHROME, FIREFOX, FIREFOX_ESR, EDGE, BEST_SUPPORTED;
    
    public boolean isChrome();
    public boolean isFirefox();
    public String getUserAgent();
}

public enum HttpMethod {
    GET, POST, PUT, DELETE, HEAD, OPTIONS, TRACE, PATCH
}

public interface Page {
    void initialize();
    void cleanUp();  
    WebResponse getWebResponse();
    URL getUrl();
    boolean isHtmlPage();
}

public class NameValuePair {
    public NameValuePair(String name, String value);
    public String getName();
    public String getValue();
}

public interface DomNodeList<T extends DomNode> extends List<T> {
    // Specialized list interface for DOM nodes
    // Implements all List methods for accessing DOM elements
}

public interface WebWindow {
    Page getEnclosedPage();
    void setEnclosedPage(Page page);
    String getName();
    WebWindow getParentWindow();
    WebWindow getTopWindow();
    WebClient getWebClient();
}

public class Cookie {
    public Cookie(String domain, String name, String value);
    public String getName();
    public String getValue();
    public String getDomain();
    public String getPath();
    public Date getExpires();
    public boolean isSecure();
    public boolean isHttpOnly();
}

public class CookieManager {
    public void setCookiesEnabled(boolean enabled);
    public boolean isCookiesEnabled();
    public Set<Cookie> getCookies();
    public void addCookie(Cookie cookie);
    public void removeCookie(Cookie cookie);
    public void clearExpired(Date date);
}

public class FailingHttpStatusCodeException extends RuntimeException {
    public int getStatusCode();
    public String getStatusMessage();
    public WebResponse getResponse();
}

public class ScriptException extends RuntimeException {
    // JavaScript execution errors with detailed error information
}