A headless browser for Java programs that provides web automation, form handling, JavaScript execution, and DOM manipulation capabilities.
npx @tessl/cli install tessl/maven-org-htmlunit--htmlunit@4.17.0HtmlUnit is a headless web browser library for Java that models HTML documents and provides an API for programmatic web interaction. It enables form submission, link clicking, JavaScript execution, and DOM manipulation - simulating user browser behavior for automated testing and web scraping.
Add to your pom.xml:
<dependency>
<groupId>org.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>4.17.0-SNAPSHOT</version>
</dependency>Add to your build.gradle:
implementation 'org.htmlunit:htmlunit:4.17.0-SNAPSHOT'import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlPage;
import org.htmlunit.BrowserVersion;For form handling:
import org.htmlunit.html.HtmlForm;
import org.htmlunit.html.HtmlTextInput;
import org.htmlunit.html.HtmlSubmitInput;
import org.htmlunit.html.HtmlSelect;For HTTP requests:
import org.htmlunit.WebRequest;
import org.htmlunit.WebResponse;
import org.htmlunit.HttpMethod;For JavaScript handling:
import org.htmlunit.AlertHandler;
import org.htmlunit.ConfirmHandler;
import org.htmlunit.JavaScriptErrorListener;For cookie management:
import org.htmlunit.CookieManager;
import org.htmlunit.util.Cookie;import org.htmlunit.WebClient;
import org.htmlunit.html.HtmlPage;
import org.htmlunit.html.HtmlForm;
import org.htmlunit.html.HtmlTextInput;
import org.htmlunit.BrowserVersion;
// Create web client
try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
// Configure options
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
// Navigate to page
HtmlPage page = webClient.getPage("http://example.com");
System.out.println("Page title: " + page.getTitleText());
// Find and fill form
HtmlForm form = page.getFormByName("loginForm");
HtmlTextInput username = form.getInputByName("username");
username.setValue("myuser");
// Submit form
HtmlPage result = form.getInputByValue("Login").click();
System.out.println("Result: " + result.asNormalizedText());
}HtmlUnit is built around several key components:
Core browser functionality including client configuration, page navigation, window management, and resource cleanup. Essential for all web automation tasks.
public class WebClient implements AutoCloseable {
public WebClient();
public WebClient(BrowserVersion browserVersion);
public <P extends Page> P getPage(String url) throws IOException;
public <P extends Page> P getPage(URL url) throws IOException;
public <P extends Page> P getPage(WebRequest request) throws IOException;
public void close();
public WebClientOptions getOptions();
public BrowserVersion getBrowserVersion();
}
public class WebClientOptions {
public void setJavaScriptEnabled(boolean enabled);
public boolean isJavaScriptEnabled();
public void setCssEnabled(boolean enabled);
public void setThrowExceptionOnScriptError(boolean throwException);
public void setTimeout(int timeout);
}HTML page representation and DOM manipulation capabilities including element selection, content extraction, and page structure navigation.
public class HtmlPage extends SgmlPage {
public DomElement getElementById(String id);
public DomNodeList<HtmlElement> getElementsByTagName(String name);
public String getTitleText();
public String asNormalizedText();
public List<HtmlForm> getForms();
public List<HtmlAnchor> getAnchors();
}
public abstract class HtmlElement extends DomElement {
public void click() throws IOException;
public String getAttribute(String name);
public void setAttribute(String name, String value);
public String getId();
public void focus();
}Comprehensive form interaction including input field manipulation, form submission, and all HTML form element types (text, password, checkbox, radio, select).
public class HtmlForm extends HtmlElement {
public <P extends Page> P submit() throws IOException;
public <P extends Page> P submit(SubmittableElement submitElement) throws IOException;
public HtmlElement getInputByName(String name);
public List<HtmlElement> getInputsByName(String name);
public HtmlTextArea getTextAreaByName(String name);
public HtmlSelect getSelectByName(String name);
}
public abstract class HtmlInput extends HtmlElement {
public String getValue();
public void setValue(String value);
public String getName();
public String getType();
}HTTP communication layer providing request customization, response processing, header management, and connection configuration.
public class WebRequest {
public WebRequest(URL url);
public WebRequest(URL url, HttpMethod method);
public URL getUrl();
public HttpMethod getHttpMethod();
public void setRequestBody(String body);
public void setAdditionalHeader(String name, String value);
public List<NameValuePair> getRequestParameters();
}
public class WebResponse {
public int getStatusCode();
public String getStatusMessage();
public String getContentAsString();
public String getContentType();
public List<NameValuePair> getResponseHeaders();
}JavaScript engine configuration and event handling including script execution control, error handling, and browser API simulation.
public interface AlertHandler {
void handleAlert(Page page, String message);
}
public interface ConfirmHandler {
boolean handleConfirm(Page page, String message);
}
public interface JavaScriptErrorListener {
void scriptException(HtmlPage page, ScriptException scriptException);
void timeoutError(HtmlPage page, long allowedTime, long executionTime);
}Cookie handling and session management including cookie creation, retrieval, and automatic cookie processing for session maintenance.
public class CookieManager {
public void setCookiesEnabled(boolean enabled);
public boolean isCookiesEnabled();
public Set<Cookie> getCookies();
public void addCookie(Cookie cookie);
public void removeCookie(Cookie cookie);
public void clearExpired(Date date);
}
public class Cookie {
public Cookie(String domain, String name, String value);
public String getName();
public String getValue();
public String getDomain();
public String getPath();
public Date getExpires();
public boolean isSecure();
public boolean isHttpOnly();
}Browser window and frame management including multiple window handling, window navigation, and frame interactions.
public interface WebWindow {
public Page getEnclosedPage();
public void setEnclosedPage(Page page);
public String getName();
public WebWindow getParentWindow();
public WebWindow getTopWindow();
public WebClient getWebClient();
}
public class TopLevelWindow implements WebWindow {
// Main browser windows
}
public class FrameWindow implements WebWindow {
// Frame and iframe windows
}Error handling and exception management for HTTP errors, JavaScript errors, and element access failures.
public class FailingHttpStatusCodeException extends RuntimeException {
public int getStatusCode();
public String getStatusMessage();
public WebResponse getResponse();
}
public class ElementNotFoundException extends RuntimeException {
// Thrown when elements cannot be found
}
public class ScriptException extends RuntimeException {
// JavaScript execution errors
}public enum BrowserVersion {
CHROME, FIREFOX, FIREFOX_ESR, EDGE, BEST_SUPPORTED;
public boolean isChrome();
public boolean isFirefox();
public String getUserAgent();
}
public enum HttpMethod {
GET, POST, PUT, DELETE, HEAD, OPTIONS, TRACE, PATCH
}
public interface Page {
void initialize();
void cleanUp();
WebResponse getWebResponse();
URL getUrl();
boolean isHtmlPage();
}
public class NameValuePair {
public NameValuePair(String name, String value);
public String getName();
public String getValue();
}
public interface DomNodeList<T extends DomNode> extends List<T> {
// Specialized list interface for DOM nodes
// Implements all List methods for accessing DOM elements
}
public interface WebWindow {
Page getEnclosedPage();
void setEnclosedPage(Page page);
String getName();
WebWindow getParentWindow();
WebWindow getTopWindow();
WebClient getWebClient();
}
public class Cookie {
public Cookie(String domain, String name, String value);
public String getName();
public String getValue();
public String getDomain();
public String getPath();
public Date getExpires();
public boolean isSecure();
public boolean isHttpOnly();
}
public class CookieManager {
public void setCookiesEnabled(boolean enabled);
public boolean isCookiesEnabled();
public Set<Cookie> getCookies();
public void addCookie(Cookie cookie);
public void removeCookie(Cookie cookie);
public void clearExpired(Date date);
}
public class FailingHttpStatusCodeException extends RuntimeException {
public int getStatusCode();
public String getStatusMessage();
public WebResponse getResponse();
}
public class ScriptException extends RuntimeException {
// JavaScript execution errors with detailed error information
}