A headless browser intended for use in testing web-based applications.
npx @tessl/cli install tessl/maven-net-sourceforge-htmlunit--htmlunit@2.70.0HtmlUnit is a comprehensive headless browser library for Java that enables automated testing and web scraping of web-based applications. It provides a pure Java implementation of a web browser with full HTML, CSS, and JavaScript support, including advanced features like form submission, cookie management, SSL certificate handling, and proxy configuration.
pom.xml: <dependency><groupId>net.sourceforge.htmlunit</groupId><artifactId>htmlunit</artifactId><version>2.70.0</version></dependency>import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebResponse;import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
// Create a web client instance
try (WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
// Configure options
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
// Load a web page
HtmlPage page = webClient.getPage("http://example.com");
// Find and interact with form elements
HtmlForm form = page.getFormByName("loginForm");
HtmlTextInput usernameField = form.getInputByName("username");
usernameField.setValueAttribute("user123");
HtmlSubmitInput submitButton = form.getInputByType("submit");
HtmlPage resultPage = submitButton.click();
// Extract page content
String pageTitle = resultPage.getTitleText();
String pageText = resultPage.asText();
} // WebClient implements AutoCloseableHtmlUnit is built around several key components:
Core browser automation functionality for loading pages, managing windows, and configuring browser behavior. Essential for web scraping and automated testing.
public class WebClient implements AutoCloseable {
public WebClient();
public WebClient(BrowserVersion browserVersion);
public WebClient(BrowserVersion browserVersion, String proxyHost, int proxyPort);
public <P extends Page> P getPage(String url) throws IOException, FailingHttpStatusCodeException;
public <P extends Page> P getPage(URL url) throws IOException, FailingHttpStatusCodeException;
public <P extends Page> P getPage(WebRequest request) throws IOException, FailingHttpStatusCodeException;
public WebClientOptions getOptions();
public BrowserVersion getBrowserVersion();
public void close();
}Comprehensive HTML DOM access and manipulation with CSS selectors, XPath queries, and element interaction. Perfect for form automation and content extraction.
public class HtmlPage extends SgmlPage {
public HtmlElement getElementById(String id);
public List<HtmlElement> getElementsByTagName(String tagName);
public List<HtmlElement> getElementsByName(String name);
public List<HtmlElement> getElementsByClassName(String className);
public HtmlElement querySelector(String selectors);
public List<HtmlElement> querySelectorAll(String selectors);
public DomNode getFirstByXPath(String xpathExpr);
public List<?> getByXPath(String xpathExpr);
public String asText();
public String asXml();
public String getTitleText();
}Form automation capabilities including field input, selection handling, and form submission. Ideal for login automation and data entry workflows.
public class HtmlForm extends HtmlElement {
public <P extends Page> P submit() throws IOException;
public <P extends Page> P submit(SubmittableElement submitElement) throws IOException;
public void reset();
public HtmlElement getInputByName(String name);
public List<HtmlElement> getInputsByName(String name);
public HtmlTextInput getInputByValue(String value);
}
public abstract class HtmlInput extends HtmlElement implements SubmittableElement {
public String getValueAttribute();
public void setValueAttribute(String value);
public String getNameAttribute();
public boolean isDisabled();
public void setDisabled(boolean disabled);
}HTTP request/response handling with full control over headers, methods, authentication, and connection settings. Essential for API testing and advanced web scraping.
public class WebRequest {
public WebRequest(URL url);
public WebRequest(URL url, HttpMethod submitMethod);
public URL getUrl();
public void setUrl(URL url);
public HttpMethod getHttpMethod();
public void setHttpMethod(HttpMethod method);
public String getRequestBody();
public void setRequestBody(String requestBody);
public void setAdditionalHeader(String name, String value);
public Map<String, String> getAdditionalHeaders();
}
public class WebResponse {
public int getStatusCode();
public String getStatusMessage();
public String getContentAsString();
public String getContentAsString(Charset charset);
public InputStream getContentAsStream();
public List<NameValuePair> getResponseHeaders();
public String getResponseHeaderValue(String headerName);
}JavaScript engine integration for executing JavaScript code within web pages and handling browser API calls. Critical for modern web application automation.
public class HtmlPage extends SgmlPage {
public ScriptResult executeJavaScript(String sourceCode);
public ScriptResult executeJavaScript(String sourceCode, String sourceName, int startLine);
}
public class ScriptResult {
public Object getJavaScriptResult();
public Page getNewPage();
}
public interface JavaScriptErrorListener {
void scriptException(HtmlPage page, ScriptException scriptException);
void timeoutError(HtmlPage page, long allowedTime, long executionTime);
void malformedScriptURL(HtmlPage page, String url, MalformedURLException malformedURLException);
void loadScriptError(HtmlPage page, URL scriptUrl, Exception exception);
}Browser window and frame management for handling pop-ups, iframes, and multi-window scenarios. Required for complex web application navigation.
public interface WebWindow {
public String getName();
public void setName(String name);
public Page getEnclosedPage();
public void setEnclosedPage(Page page);
public WebClient getWebClient();
public WebWindow getParentWindow();
public WebWindow getTopWindow();
public History getHistory();
public int getInnerHeight();
public int getInnerWidth();
}
public class TopLevelWindow extends WebWindowImpl {
// Implementation for top-level browser windows
}
public class DialogWindow extends WebWindowImpl {
// Implementation for modal dialog windows
}HTTP cookie handling with domain scoping, expiration management, and security flags. Essential for session management and authentication workflows.
public class CookieManager {
public void addCookie(Cookie cookie);
public Set<Cookie> getCookies();
public Set<Cookie> getCookies(URL url);
public void clearCookies();
public boolean isCookiesEnabled();
public void setCookiesEnabled(boolean enabled);
}
public class Cookie {
public Cookie(String domain, String name, String value);
public Cookie(String domain, String name, String value, String path, Date expires, boolean secure);
public String getName();
public String getValue();
public String getDomain();
public String getPath();
public Date getExpires();
public boolean isSecure();
public boolean isHttpOnly();
}public enum HttpMethod {
OPTIONS, GET, HEAD, POST, PUT, DELETE, TRACE, PATCH
}
public class BrowserVersion {
public static final BrowserVersion CHROME;
public static final BrowserVersion FIREFOX;
public static final BrowserVersion FIREFOX_ESR;
public static final BrowserVersion EDGE;
public static final BrowserVersion INTERNET_EXPLORER;
public static final BrowserVersion BEST_SUPPORTED;
public String getApplicationName();
public String getApplicationVersion();
public String getUserAgent();
public boolean hasFeature(BrowserFeature feature);
}
public class NameValuePair {
public NameValuePair(String name, String value);
public String getName();
public String getValue();
}
public class FailingHttpStatusCodeException extends RuntimeException {
public int getStatusCode();
public String getStatusMessage();
public WebResponse getResponse();
}
public class ElementNotFoundException extends RuntimeException {
// Thrown when element lookups fail
}