0
# Form Fields and Interactive Elements
1
2
Comprehensive form field manipulation capabilities for reading, updating, and managing interactive PDF forms. pypdf provides robust support for working with form fields, annotations, and interactive elements in PDF documents.
3
4
## Capabilities
5
6
### Form Field Value Updates
7
8
Update form field values across pages with support for different field types and automatic appearance generation.
9
10
```python { .api }
11
def update_page_form_field_values(
12
self,
13
page: PageObject | list[PageObject] | None,
14
fields: dict[str, str | list[str] | tuple[str, str, float]],
15
flags: int = 0,
16
auto_regenerate: bool = True,
17
flatten: bool = False
18
) -> None:
19
"""
20
Update form field values for given page(s) from a fields dictionary.
21
22
Args:
23
page: Page(s) to update, or None for all pages
24
fields: Dictionary mapping field names to values
25
- str: Simple text value
26
- list[str]: Multiple values for choice fields
27
- tuple[str, str, float]: (value, export_value, font_size)
28
flags: Form field flags for appearance
29
auto_regenerate: Whether to regenerate field appearances automatically
30
flatten: Whether to flatten fields after updating (make non-editable)
31
"""
32
```
33
34
### Form Field Appearance Control
35
36
Control how form fields are rendered and displayed in PDF viewers.
37
38
```python { .api }
39
def set_need_appearances_writer(self, state: bool = True) -> None:
40
"""
41
Set the NeedAppearances flag for form fields.
42
43
The NeedAppearances flag indicates whether the PDF viewer should
44
automatically generate appearances for form fields or use embedded
45
appearances.
46
47
Args:
48
state: Whether to enable automatic appearance generation
49
"""
50
```
51
52
### Form Field Structure Management
53
54
Manage the hierarchical structure of form fields and handle orphaned field elements.
55
56
```python { .api }
57
def reattach_fields(self, page: PageObject | None = None) -> list[DictionaryObject]:
58
"""
59
Parse page annotations to find orphan fields and reattach them
60
to the document's form field structure.
61
62
Args:
63
page: Page to analyze, or None to analyze all pages
64
65
Returns:
66
List of reattached field dictionary objects
67
"""
68
```
69
70
## Usage Examples
71
72
### Basic Form Field Updates
73
74
```python
75
from pypdf import PdfReader, PdfWriter
76
77
# Read PDF with form fields
78
reader = PdfReader("form.pdf")
79
writer = PdfWriter()
80
81
# Copy pages to writer
82
for page in reader.pages:
83
writer.add_page(page)
84
85
# Update form field values
86
field_updates = {
87
"FirstName": "John",
88
"LastName": "Doe",
89
"Email": "john.doe@example.com",
90
"Age": "30",
91
"Subscription": "Premium" # Checkbox or choice field
92
}
93
94
# Update all pages with form fields
95
writer.update_page_form_field_values(None, field_updates)
96
97
# Save updated PDF
98
with open("filled_form.pdf", "wb") as output:
99
writer.write(output)
100
```
101
102
### Advanced Form Field Operations
103
104
```python
105
from pypdf import PdfReader, PdfWriter
106
107
reader = PdfReader("complex_form.pdf")
108
writer = PdfWriter()
109
writer.append_pages_from_reader(reader)
110
111
# Complex field values with formatting
112
advanced_fields = {
113
# Simple text field
114
"name": "Alice Smith",
115
116
# Choice field with multiple selections
117
"interests": ["Technology", "Science", "Art"],
118
119
# Field with custom font size and export value
120
"salary": ("75000", "75k", 10.0), # (display_value, export_value, font_size)
121
122
# Boolean/checkbox field
123
"agree_terms": "Yes"
124
}
125
126
# Update with custom flags and flattening
127
writer.update_page_form_field_values(
128
page=None, # All pages
129
fields=advanced_fields,
130
flags=0,
131
auto_regenerate=True, # Generate field appearances
132
flatten=True # Make fields non-editable
133
)
134
135
with open("processed_form.pdf", "wb") as output:
136
writer.write(output)
137
```
138
139
### Form Field Structure Repair
140
141
```python
142
from pypdf import PdfReader, PdfWriter
143
144
reader = PdfReader("damaged_form.pdf")
145
writer = PdfWriter()
146
writer.append_pages_from_reader(reader)
147
148
# Repair orphaned form fields
149
for page in writer.pages:
150
orphaned_fields = writer.reattach_fields(page)
151
if orphaned_fields:
152
print(f"Reattached {len(orphaned_fields)} orphaned fields")
153
154
# Ensure appearances are properly generated
155
writer.set_need_appearances_writer(True)
156
157
with open("repaired_form.pdf", "wb") as output:
158
writer.write(output)
159
```
160
161
### Batch Form Processing
162
163
```python
164
from pypdf import PdfReader, PdfWriter
165
import json
166
167
def process_form_batch(template_path: str, data_file: str, output_dir: str):
168
"""Process multiple form submissions from JSON data."""
169
170
# Load form data
171
with open(data_file, 'r') as f:
172
submissions = json.load(f)
173
174
template_reader = PdfReader(template_path)
175
176
for i, submission in enumerate(submissions):
177
# Create new writer for each submission
178
writer = PdfWriter()
179
writer.append_pages_from_reader(template_reader)
180
181
# Update form fields
182
writer.update_page_form_field_values(
183
page=None,
184
fields=submission,
185
auto_regenerate=True
186
)
187
188
# Save individual form
189
output_path = f"{output_dir}/form_{i+1:03d}.pdf"
190
with open(output_path, "wb") as output:
191
writer.write(output)
192
193
print(f"Generated form {i+1}: {output_path}")
194
195
# Use batch processor
196
process_form_batch(
197
"application_template.pdf",
198
"submissions.json",
199
"completed_forms/"
200
)
201
```
202
203
### Reading Existing Form Field Values
204
205
```python
206
from pypdf import PdfReader
207
208
def extract_form_data(pdf_path: str) -> dict:
209
"""Extract all form field values from a PDF."""
210
211
reader = PdfReader(pdf_path)
212
form_data = {}
213
214
# Check if PDF has form fields
215
if reader.is_encrypted:
216
print("PDF is encrypted - decrypt first")
217
return form_data
218
219
try:
220
# Access form fields through document structure
221
if hasattr(reader, 'root_object') and '/AcroForm' in reader.root_object:
222
acroform = reader.root_object['/AcroForm']
223
if '/Fields' in acroform:
224
fields = acroform['/Fields']
225
for field_ref in fields:
226
field = field_ref.get_object()
227
if '/T' in field: # Field name
228
field_name = field['/T']
229
field_value = field.get('/V', '') # Field value
230
form_data[field_name] = field_value
231
232
except Exception as e:
233
print(f"Error extracting form data: {e}")
234
235
return form_data
236
237
# Extract form data
238
data = extract_form_data("filled_form.pdf")
239
print("Form field values:", data)
240
```