0
# Form Handling
1
2
HTML form processing with automatic form control discovery and submission capabilities through the HTTP connection system. jsoup provides comprehensive support for working with HTML forms, extracting form data, and submitting forms programmatically.
3
4
## Capabilities
5
6
### FormElement Class
7
8
FormElement extends Element and provides specialized functionality for HTML forms.
9
10
```java { .api }
11
/**
12
* Get all form control elements within this form.
13
* @return Elements collection of form controls (input, select, textarea, button)
14
*/
15
public Elements elements();
16
17
/**
18
* Add a form control element to this form.
19
* @param element form control element to add
20
* @return this FormElement for chaining
21
*/
22
public FormElement addElement(Element element);
23
24
/**
25
* Prepare a Connection for submitting this form.
26
* @return Connection configured with form action, method, and data
27
*/
28
public Connection submit();
29
30
/**
31
* Get form data as key-value pairs.
32
* @return List of Connection.KeyVal pairs representing form data
33
*/
34
public List<Connection.KeyVal> formData();
35
```
36
37
**Usage Examples:**
38
39
```java
40
import org.jsoup.Jsoup;
41
import org.jsoup.nodes.Document;
42
import org.jsoup.nodes.Element;
43
import org.jsoup.nodes.FormElement;
44
import org.jsoup.select.Elements;
45
46
Document doc = Jsoup.parse("""
47
<form action="/submit" method="post">
48
<input type="text" name="username" value="john">
49
<input type="password" name="password" value="secret">
50
<input type="hidden" name="token" value="abc123">
51
<select name="country">
52
<option value="us" selected>United States</option>
53
<option value="ca">Canada</option>
54
</select>
55
<textarea name="comments">User feedback</textarea>
56
<input type="submit" value="Submit">
57
</form>
58
""");
59
60
FormElement form = (FormElement) doc.selectFirst("form");
61
62
// Get form controls
63
Elements controls = form.elements();
64
System.out.println("Form has " + controls.size() + " controls");
65
66
// Get form data
67
List<Connection.KeyVal> data = form.formData();
68
for (Connection.KeyVal pair : data) {
69
System.out.println(pair.key() + " = " + pair.value());
70
}
71
```
72
73
### Finding Forms
74
75
Locate forms within documents using CSS selectors or specialized methods.
76
77
```java { .api }
78
/**
79
* Find all forms in the document.
80
* @return Elements collection containing FormElement objects
81
*/
82
public Elements forms(); // Available on Elements collections
83
```
84
85
**Usage Examples:**
86
87
```java
88
Document doc = Jsoup.connect("https://example.com/login").get();
89
90
// Find forms using selectors
91
Elements allForms = doc.select("form");
92
FormElement loginForm = (FormElement) doc.selectFirst("form#login-form");
93
FormElement firstForm = (FormElement) doc.selectFirst("form");
94
95
// Find forms in specific containers
96
Elements containerForms = doc.select("#main-content").forms();
97
98
// Cast Element to FormElement
99
Element formElement = doc.selectFirst("form");
100
if (formElement instanceof FormElement) {
101
FormElement form = (FormElement) formElement;
102
// Use form-specific methods
103
}
104
```
105
106
### Form Data Extraction
107
108
Extract form data values from various input types.
109
110
```java { .api }
111
// Form control value extraction
112
String textValue = textInput.val(); // Text input value
113
String selectedOption = select.val(); // Selected option value
114
String textareaContent = textarea.val(); // Textarea content
115
boolean isChecked = checkbox.hasAttr("checked"); // Checkbox state
116
String radioValue = radioButton.val(); // Radio button value
117
```
118
119
**Usage Examples:**
120
121
```java
122
FormElement form = (FormElement) doc.selectFirst("form");
123
Elements controls = form.elements();
124
125
// Extract values by input type
126
for (Element control : controls) {
127
String name = control.attr("name");
128
String type = control.attr("type");
129
String value = control.val();
130
131
switch (type) {
132
case "text":
133
case "email":
134
case "password":
135
System.out.println(name + " (text): " + value);
136
break;
137
case "checkbox":
138
boolean checked = control.hasAttr("checked");
139
System.out.println(name + " (checkbox): " + (checked ? value : "unchecked"));
140
break;
141
case "radio":
142
if (control.hasAttr("checked")) {
143
System.out.println(name + " (radio): " + value);
144
}
145
break;
146
case "hidden":
147
System.out.println(name + " (hidden): " + value);
148
break;
149
}
150
}
151
152
// Handle select elements
153
Elements selects = form.select("select");
154
for (Element select : selects) {
155
String name = select.attr("name");
156
Element selectedOption = select.selectFirst("option[selected]");
157
String value = selectedOption != null ? selectedOption.val() : "";
158
System.out.println(name + " (select): " + value);
159
}
160
161
// Handle textareas
162
Elements textareas = form.select("textarea");
163
for (Element textarea : textareas) {
164
String name = textarea.attr("name");
165
String content = textarea.text(); // Textarea content is in text, not val
166
System.out.println(name + " (textarea): " + content);
167
}
168
```
169
170
### Form Submission
171
172
Submit forms programmatically with automatic data collection and HTTP configuration.
173
174
```java { .api }
175
/**
176
* Prepare Connection for form submission with automatic configuration.
177
* - Sets HTTP method from form method attribute (GET/POST)
178
* - Sets URL from form action attribute
179
* - Includes all form control data
180
* @return Connection ready for execution
181
*/
182
public Connection submit();
183
```
184
185
**Usage Examples:**
186
187
```java
188
// Basic form submission
189
FormElement form = (FormElement) doc.selectFirst("form");
190
Connection.Response response = form.submit().execute();
191
192
if (response.statusCode() == 200) {
193
Document resultPage = response.parse();
194
System.out.println("Form submitted successfully");
195
} else {
196
System.out.println("Form submission failed: " + response.statusCode());
197
}
198
199
// Modify form data before submission
200
Connection conn = form.submit()
201
.data("additional_field", "extra_value") // Add extra data
202
.cookie("session", "session_token") // Add authentication
203
.userAgent("MyBot/1.0"); // Set user agent
204
205
Document result = conn.post();
206
207
// Override form values
208
conn = form.submit()
209
.data("username", "different_user") // Override existing field
210
.data("password", "new_password"); // Override existing field
211
212
Document loginResult = conn.post();
213
```
214
215
### Multi-Step Form Workflows
216
217
Handle complex form workflows with sessions and state management.
218
219
```java { .api }
220
// No specific API - use Connection sessions with forms
221
```
222
223
**Usage Examples:**
224
225
```java
226
// Login workflow with session management
227
Connection session = Jsoup.newSession()
228
.userAgent("Mozilla/5.0")
229
.timeout(10000);
230
231
// Step 1: Get login form
232
Document loginPage = session.newRequest()
233
.url("https://example.com/login")
234
.get();
235
236
FormElement loginForm = (FormElement) loginPage.selectFirst("form#login");
237
238
// Step 2: Submit login form
239
Connection.Response loginResponse = loginForm.submit()
240
.data("username", "myuser")
241
.data("password", "mypass")
242
.execute();
243
244
if (loginResponse.statusCode() == 302) { // Redirect after login
245
System.out.println("Login successful");
246
247
// Step 3: Access protected form (session maintains cookies)
248
Document protectedPage = session.newRequest()
249
.url("https://example.com/profile")
250
.get();
251
252
FormElement profileForm = (FormElement) protectedPage.selectFirst("form#profile");
253
254
// Step 4: Submit profile update
255
Document result = profileForm.submit()
256
.data("email", "new@example.com")
257
.data("name", "New Name")
258
.post();
259
}
260
```
261
262
### File Upload Forms
263
264
Handle forms with file upload capabilities.
265
266
```java { .api }
267
/**
268
* Add file upload data to form submission.
269
* @param key form field name
270
* @param filename filename for upload
271
* @param inputStream file content stream
272
* @return Connection for chaining
273
*/
274
public Connection data(String key, String filename, InputStream inputStream);
275
```
276
277
**Usage Examples:**
278
279
```java
280
FormElement uploadForm = (FormElement) doc.selectFirst("form[enctype='multipart/form-data']");
281
282
// Prepare file upload
283
FileInputStream fileStream = new FileInputStream("document.pdf");
284
285
Connection.Response response = uploadForm.submit()
286
.data("file", "document.pdf", fileStream)
287
.data("description", "Important document")
288
.execute();
289
290
// Handle upload response
291
if (response.statusCode() == 200) {
292
System.out.println("File uploaded successfully");
293
} else {
294
System.out.println("Upload failed: " + response.statusMessage());
295
}
296
297
// Always close streams
298
fileStream.close();
299
```
300
301
### Dynamic Form Manipulation
302
303
Modify forms before submission by changing values, adding fields, or updating attributes.
304
305
**Usage Examples:**
306
307
```java
308
FormElement form = (FormElement) doc.selectFirst("form");
309
310
// Modify existing field values
311
Element usernameField = form.selectFirst("input[name=username]");
312
usernameField.val("new_username");
313
314
// Add new hidden fields
315
Element csrfToken = doc.createElement("input");
316
csrfToken.attr("type", "hidden")
317
.attr("name", "csrf_token")
318
.attr("value", "generated_token");
319
form.appendChild(csrfToken);
320
321
// Modify form attributes
322
form.attr("action", "/new-endpoint");
323
form.attr("method", "put");
324
325
// Submit modified form
326
Document result = form.submit().execute().parse();
327
```
328
329
### Form Validation and Processing
330
331
Extract and validate form data before processing.
332
333
**Usage Examples:**
334
335
```java
336
public class FormProcessor {
337
338
public Map<String, String> extractFormData(FormElement form) {
339
Map<String, String> data = new HashMap<>();
340
341
List<Connection.KeyVal> formData = form.formData();
342
for (Connection.KeyVal pair : formData) {
343
data.put(pair.key(), pair.value());
344
}
345
346
return data;
347
}
348
349
public boolean validateForm(FormElement form) {
350
Map<String, String> data = extractFormData(form);
351
352
// Check required fields
353
String[] requiredFields = {"username", "email", "password"};
354
for (String field : requiredFields) {
355
if (!data.containsKey(field) || data.get(field).trim().isEmpty()) {
356
System.err.println("Required field missing: " + field);
357
return false;
358
}
359
}
360
361
// Validate email format
362
String email = data.get("email");
363
if (email != null && !email.contains("@")) {
364
System.err.println("Invalid email format");
365
return false;
366
}
367
368
return true;
369
}
370
371
public Document submitFormSafely(FormElement form, Map<String, String> overrides) {
372
try {
373
if (!validateForm(form)) {
374
throw new IllegalArgumentException("Form validation failed");
375
}
376
377
Connection conn = form.submit();
378
379
// Apply overrides
380
if (overrides != null) {
381
for (Map.Entry<String, String> entry : overrides.entrySet()) {
382
conn.data(entry.getKey(), entry.getValue());
383
}
384
}
385
386
Connection.Response response = conn.execute();
387
388
if (response.statusCode() >= 400) {
389
throw new IOException("Form submission failed: " + response.statusCode());
390
}
391
392
return response.parse();
393
394
} catch (IOException e) {
395
System.err.println("Form submission error: " + e.getMessage());
396
throw new RuntimeException(e);
397
}
398
}
399
}
400
```
401
402
### Form Data Types
403
404
Handle different types of form controls and their data extraction patterns.
405
406
```java
407
// Key-Value interface for form data
408
public interface Connection.KeyVal {
409
String key(); // Form field name
410
String value(); // Form field value
411
InputStream inputStream(); // For file uploads
412
boolean hasInputStream(); // Check if file upload
413
}
414
```
415
416
**Usage Examples:**
417
418
```java
419
FormElement form = (FormElement) doc.selectFirst("form");
420
List<Connection.KeyVal> allData = form.formData();
421
422
for (Connection.KeyVal item : allData) {
423
String fieldName = item.key();
424
String fieldValue = item.value();
425
426
if (item.hasInputStream()) {
427
System.out.println(fieldName + " is a file upload");
428
// Handle file upload data
429
} else {
430
System.out.println(fieldName + " = " + fieldValue);
431
}
432
}
433
434
// Filter specific field types
435
List<Connection.KeyVal> textFields = allData.stream()
436
.filter(kv -> !kv.hasInputStream())
437
.collect(Collectors.toList());
438
439
List<Connection.KeyVal> fileFields = allData.stream()
440
.filter(Connection.KeyVal::hasInputStream)
441
.collect(Collectors.toList());
442
```
443
444
### Error Handling and Debugging
445
446
Handle common form processing errors and debug form submissions.
447
448
**Usage Examples:**
449
450
```java
451
public class FormSubmissionHandler {
452
453
public Document submitFormWithRetry(FormElement form, int maxRetries) {
454
int attempts = 0;
455
456
while (attempts < maxRetries) {
457
try {
458
attempts++;
459
460
Connection conn = form.submit()
461
.timeout(30000)
462
.ignoreHttpErrors(false);
463
464
Connection.Response response = conn.execute();
465
466
// Log submission details
467
System.out.println("Form submission attempt " + attempts);
468
System.out.println("Status: " + response.statusCode());
469
System.out.println("Content-Type: " + response.contentType());
470
471
if (response.statusCode() < 400) {
472
return response.parse();
473
} else {
474
System.err.println("HTTP error: " + response.statusMessage());
475
if (attempts >= maxRetries) {
476
throw new IOException("Max retries exceeded");
477
}
478
}
479
480
} catch (SocketTimeoutException e) {
481
System.err.println("Timeout on attempt " + attempts);
482
if (attempts >= maxRetries) {
483
throw new RuntimeException("Form submission timed out", e);
484
}
485
} catch (IOException e) {
486
System.err.println("IO error on attempt " + attempts + ": " + e.getMessage());
487
if (attempts >= maxRetries) {
488
throw new RuntimeException("Form submission failed", e);
489
}
490
}
491
492
// Wait before retrying
493
try {
494
Thread.sleep(1000 * attempts); // Exponential backoff
495
} catch (InterruptedException ie) {
496
Thread.currentThread().interrupt();
497
break;
498
}
499
}
500
501
throw new RuntimeException("Form submission failed after " + maxRetries + " attempts");
502
}
503
504
public void debugFormData(FormElement form) {
505
System.out.println("=== Form Debug Information ===");
506
System.out.println("Action: " + form.attr("action"));
507
System.out.println("Method: " + form.attr("method"));
508
System.out.println("Encoding: " + form.attr("enctype"));
509
510
Elements controls = form.elements();
511
System.out.println("Form controls (" + controls.size() + "):");
512
513
for (Element control : controls) {
514
String tag = control.tagName();
515
String type = control.attr("type");
516
String name = control.attr("name");
517
String value = control.val();
518
519
System.out.printf(" %s[type=%s, name=%s] = %s%n",
520
tag, type, name, value);
521
}
522
523
List<Connection.KeyVal> data = form.formData();
524
System.out.println("Form data (" + data.size() + " pairs):");
525
for (Connection.KeyVal pair : data) {
526
System.out.printf(" %s = %s%n", pair.key(), pair.value());
527
}
528
}
529
}
530
```
531
532
This comprehensive form handling system enables robust programmatic interaction with HTML forms, supporting everything from simple contact forms to complex multi-step workflows with file uploads and authentication.