Java HTML parser library implementing the WHATWG HTML5 specification for parsing, manipulating, and sanitizing HTML and XML documents.
—
HTML form processing with automatic form control discovery and submission capabilities through the HTTP connection system. jsoup provides comprehensive support for working with HTML forms, extracting form data, and submitting forms programmatically.
FormElement extends Element and provides specialized functionality for HTML forms.
/**
* Get all form control elements within this form.
* @return Elements collection of form controls (input, select, textarea, button)
*/
public Elements elements();
/**
* Add a form control element to this form.
* @param element form control element to add
* @return this FormElement for chaining
*/
public FormElement addElement(Element element);
/**
* Prepare a Connection for submitting this form.
* @return Connection configured with form action, method, and data
*/
public Connection submit();
/**
* Get form data as key-value pairs.
* @return List of Connection.KeyVal pairs representing form data
*/
public List<Connection.KeyVal> formData();Usage Examples:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.FormElement;
import org.jsoup.select.Elements;
Document doc = Jsoup.parse("""
<form action="/submit" method="post">
<input type="text" name="username" value="john">
<input type="password" name="password" value="secret">
<input type="hidden" name="token" value="abc123">
<select name="country">
<option value="us" selected>United States</option>
<option value="ca">Canada</option>
</select>
<textarea name="comments">User feedback</textarea>
<input type="submit" value="Submit">
</form>
""");
FormElement form = (FormElement) doc.selectFirst("form");
// Get form controls
Elements controls = form.elements();
System.out.println("Form has " + controls.size() + " controls");
// Get form data
List<Connection.KeyVal> data = form.formData();
for (Connection.KeyVal pair : data) {
System.out.println(pair.key() + " = " + pair.value());
}Locate forms within documents using CSS selectors or specialized methods.
/**
* Find all forms in the document.
* @return Elements collection containing FormElement objects
*/
public Elements forms(); // Available on Elements collectionsUsage Examples:
Document doc = Jsoup.connect("https://example.com/login").get();
// Find forms using selectors
Elements allForms = doc.select("form");
FormElement loginForm = (FormElement) doc.selectFirst("form#login-form");
FormElement firstForm = (FormElement) doc.selectFirst("form");
// Find forms in specific containers
Elements containerForms = doc.select("#main-content").forms();
// Cast Element to FormElement
Element formElement = doc.selectFirst("form");
if (formElement instanceof FormElement) {
FormElement form = (FormElement) formElement;
// Use form-specific methods
}Extract form data values from various input types.
// Form control value extraction
String textValue = textInput.val(); // Text input value
String selectedOption = select.val(); // Selected option value
String textareaContent = textarea.val(); // Textarea content
boolean isChecked = checkbox.hasAttr("checked"); // Checkbox state
String radioValue = radioButton.val(); // Radio button valueUsage Examples:
FormElement form = (FormElement) doc.selectFirst("form");
Elements controls = form.elements();
// Extract values by input type
for (Element control : controls) {
String name = control.attr("name");
String type = control.attr("type");
String value = control.val();
switch (type) {
case "text":
case "email":
case "password":
System.out.println(name + " (text): " + value);
break;
case "checkbox":
boolean checked = control.hasAttr("checked");
System.out.println(name + " (checkbox): " + (checked ? value : "unchecked"));
break;
case "radio":
if (control.hasAttr("checked")) {
System.out.println(name + " (radio): " + value);
}
break;
case "hidden":
System.out.println(name + " (hidden): " + value);
break;
}
}
// Handle select elements
Elements selects = form.select("select");
for (Element select : selects) {
String name = select.attr("name");
Element selectedOption = select.selectFirst("option[selected]");
String value = selectedOption != null ? selectedOption.val() : "";
System.out.println(name + " (select): " + value);
}
// Handle textareas
Elements textareas = form.select("textarea");
for (Element textarea : textareas) {
String name = textarea.attr("name");
String content = textarea.text(); // Textarea content is in text, not val
System.out.println(name + " (textarea): " + content);
}Submit forms programmatically with automatic data collection and HTTP configuration.
/**
* Prepare Connection for form submission with automatic configuration.
* - Sets HTTP method from form method attribute (GET/POST)
* - Sets URL from form action attribute
* - Includes all form control data
* @return Connection ready for execution
*/
public Connection submit();Usage Examples:
// Basic form submission
FormElement form = (FormElement) doc.selectFirst("form");
Connection.Response response = form.submit().execute();
if (response.statusCode() == 200) {
Document resultPage = response.parse();
System.out.println("Form submitted successfully");
} else {
System.out.println("Form submission failed: " + response.statusCode());
}
// Modify form data before submission
Connection conn = form.submit()
.data("additional_field", "extra_value") // Add extra data
.cookie("session", "session_token") // Add authentication
.userAgent("MyBot/1.0"); // Set user agent
Document result = conn.post();
// Override form values
conn = form.submit()
.data("username", "different_user") // Override existing field
.data("password", "new_password"); // Override existing field
Document loginResult = conn.post();Handle complex form workflows with sessions and state management.
// No specific API - use Connection sessions with formsUsage Examples:
// Login workflow with session management
Connection session = Jsoup.newSession()
.userAgent("Mozilla/5.0")
.timeout(10000);
// Step 1: Get login form
Document loginPage = session.newRequest()
.url("https://example.com/login")
.get();
FormElement loginForm = (FormElement) loginPage.selectFirst("form#login");
// Step 2: Submit login form
Connection.Response loginResponse = loginForm.submit()
.data("username", "myuser")
.data("password", "mypass")
.execute();
if (loginResponse.statusCode() == 302) { // Redirect after login
System.out.println("Login successful");
// Step 3: Access protected form (session maintains cookies)
Document protectedPage = session.newRequest()
.url("https://example.com/profile")
.get();
FormElement profileForm = (FormElement) protectedPage.selectFirst("form#profile");
// Step 4: Submit profile update
Document result = profileForm.submit()
.data("email", "new@example.com")
.data("name", "New Name")
.post();
}Handle forms with file upload capabilities.
/**
* Add file upload data to form submission.
* @param key form field name
* @param filename filename for upload
* @param inputStream file content stream
* @return Connection for chaining
*/
public Connection data(String key, String filename, InputStream inputStream);Usage Examples:
FormElement uploadForm = (FormElement) doc.selectFirst("form[enctype='multipart/form-data']");
// Prepare file upload
FileInputStream fileStream = new FileInputStream("document.pdf");
Connection.Response response = uploadForm.submit()
.data("file", "document.pdf", fileStream)
.data("description", "Important document")
.execute();
// Handle upload response
if (response.statusCode() == 200) {
System.out.println("File uploaded successfully");
} else {
System.out.println("Upload failed: " + response.statusMessage());
}
// Always close streams
fileStream.close();Modify forms before submission by changing values, adding fields, or updating attributes.
Usage Examples:
FormElement form = (FormElement) doc.selectFirst("form");
// Modify existing field values
Element usernameField = form.selectFirst("input[name=username]");
usernameField.val("new_username");
// Add new hidden fields
Element csrfToken = doc.createElement("input");
csrfToken.attr("type", "hidden")
.attr("name", "csrf_token")
.attr("value", "generated_token");
form.appendChild(csrfToken);
// Modify form attributes
form.attr("action", "/new-endpoint");
form.attr("method", "put");
// Submit modified form
Document result = form.submit().execute().parse();Extract and validate form data before processing.
Usage Examples:
public class FormProcessor {
public Map<String, String> extractFormData(FormElement form) {
Map<String, String> data = new HashMap<>();
List<Connection.KeyVal> formData = form.formData();
for (Connection.KeyVal pair : formData) {
data.put(pair.key(), pair.value());
}
return data;
}
public boolean validateForm(FormElement form) {
Map<String, String> data = extractFormData(form);
// Check required fields
String[] requiredFields = {"username", "email", "password"};
for (String field : requiredFields) {
if (!data.containsKey(field) || data.get(field).trim().isEmpty()) {
System.err.println("Required field missing: " + field);
return false;
}
}
// Validate email format
String email = data.get("email");
if (email != null && !email.contains("@")) {
System.err.println("Invalid email format");
return false;
}
return true;
}
public Document submitFormSafely(FormElement form, Map<String, String> overrides) {
try {
if (!validateForm(form)) {
throw new IllegalArgumentException("Form validation failed");
}
Connection conn = form.submit();
// Apply overrides
if (overrides != null) {
for (Map.Entry<String, String> entry : overrides.entrySet()) {
conn.data(entry.getKey(), entry.getValue());
}
}
Connection.Response response = conn.execute();
if (response.statusCode() >= 400) {
throw new IOException("Form submission failed: " + response.statusCode());
}
return response.parse();
} catch (IOException e) {
System.err.println("Form submission error: " + e.getMessage());
throw new RuntimeException(e);
}
}
}Handle different types of form controls and their data extraction patterns.
// Key-Value interface for form data
public interface Connection.KeyVal {
String key(); // Form field name
String value(); // Form field value
InputStream inputStream(); // For file uploads
boolean hasInputStream(); // Check if file upload
}Usage Examples:
FormElement form = (FormElement) doc.selectFirst("form");
List<Connection.KeyVal> allData = form.formData();
for (Connection.KeyVal item : allData) {
String fieldName = item.key();
String fieldValue = item.value();
if (item.hasInputStream()) {
System.out.println(fieldName + " is a file upload");
// Handle file upload data
} else {
System.out.println(fieldName + " = " + fieldValue);
}
}
// Filter specific field types
List<Connection.KeyVal> textFields = allData.stream()
.filter(kv -> !kv.hasInputStream())
.collect(Collectors.toList());
List<Connection.KeyVal> fileFields = allData.stream()
.filter(Connection.KeyVal::hasInputStream)
.collect(Collectors.toList());Handle common form processing errors and debug form submissions.
Usage Examples:
public class FormSubmissionHandler {
public Document submitFormWithRetry(FormElement form, int maxRetries) {
int attempts = 0;
while (attempts < maxRetries) {
try {
attempts++;
Connection conn = form.submit()
.timeout(30000)
.ignoreHttpErrors(false);
Connection.Response response = conn.execute();
// Log submission details
System.out.println("Form submission attempt " + attempts);
System.out.println("Status: " + response.statusCode());
System.out.println("Content-Type: " + response.contentType());
if (response.statusCode() < 400) {
return response.parse();
} else {
System.err.println("HTTP error: " + response.statusMessage());
if (attempts >= maxRetries) {
throw new IOException("Max retries exceeded");
}
}
} catch (SocketTimeoutException e) {
System.err.println("Timeout on attempt " + attempts);
if (attempts >= maxRetries) {
throw new RuntimeException("Form submission timed out", e);
}
} catch (IOException e) {
System.err.println("IO error on attempt " + attempts + ": " + e.getMessage());
if (attempts >= maxRetries) {
throw new RuntimeException("Form submission failed", e);
}
}
// Wait before retrying
try {
Thread.sleep(1000 * attempts); // Exponential backoff
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
throw new RuntimeException("Form submission failed after " + maxRetries + " attempts");
}
public void debugFormData(FormElement form) {
System.out.println("=== Form Debug Information ===");
System.out.println("Action: " + form.attr("action"));
System.out.println("Method: " + form.attr("method"));
System.out.println("Encoding: " + form.attr("enctype"));
Elements controls = form.elements();
System.out.println("Form controls (" + controls.size() + "):");
for (Element control : controls) {
String tag = control.tagName();
String type = control.attr("type");
String name = control.attr("name");
String value = control.val();
System.out.printf(" %s[type=%s, name=%s] = %s%n",
tag, type, name, value);
}
List<Connection.KeyVal> data = form.formData();
System.out.println("Form data (" + data.size() + " pairs):");
for (Connection.KeyVal pair : data) {
System.out.printf(" %s = %s%n", pair.key(), pair.value());
}
}
}This comprehensive form handling system enables robust programmatic interaction with HTML forms, supporting everything from simple contact forms to complex multi-step workflows with file uploads and authentication.
Install with Tessl CLI
npx tessl i tessl/maven-org-jsoup--jsoup