0
# Stateful Web Navigation
1
2
High-level browser that maintains page state and provides convenient methods for navigation, link following, and multi-step web interactions. StatefulBrowser inherits from Browser and is recommended for most web automation applications.
3
4
## Capabilities
5
6
### Browser Creation
7
8
Create a StatefulBrowser instance with all Browser configuration options.
9
10
```python { .api }
11
class StatefulBrowser(Browser):
12
def __init__(self, *args, **kwargs):
13
"""
14
Create a StatefulBrowser instance.
15
All parameters are forwarded to Browser.__init__()
16
"""
17
```
18
19
**Usage Example:**
20
21
```python
22
import mechanicalsoup
23
24
# Basic stateful browser
25
browser = mechanicalsoup.StatefulBrowser()
26
27
# With custom configuration
28
browser = mechanicalsoup.StatefulBrowser(
29
raise_on_404=True,
30
user_agent="MyScript/1.0"
31
)
32
```
33
34
### Page State Properties
35
36
Access current page state including content, URL, and selected form.
37
38
```python { .api }
39
@property
40
def page(self):
41
"""Current page BeautifulSoup object (read-only)"""
42
43
@property
44
def url(self):
45
"""Current page URL string (read-only)"""
46
47
@property
48
def form(self):
49
"""Currently selected Form object (read-only)"""
50
```
51
52
**Usage Example:**
53
54
```python
55
browser = mechanicalsoup.StatefulBrowser()
56
browser.open("https://httpbin.org/html")
57
58
# Access current page content
59
print(browser.page.title.string)
60
61
# Access current URL
62
print(f"Current URL: {browser.url}")
63
64
# Access selected form (if any)
65
if browser.form:
66
print(f"Current form action: {browser.form.form.get('action')}")
67
```
68
69
### Page Navigation
70
71
Navigate to URLs and manage page state.
72
73
```python { .api }
74
def open(self, url, *args, **kwargs):
75
"""
76
Open URL and update browser state.
77
78
Parameters:
79
- url: URL to open
80
- *args, **kwargs: Forwarded to Browser.get()
81
82
Returns:
83
requests.Response with soup attribute
84
"""
85
86
def open_fake_page(self, page_text, url=None, soup_config=None):
87
"""
88
Mock page loading for testing purposes.
89
90
Parameters:
91
- page_text: HTML content as string
92
- url: Optional URL to associate with fake page
93
- soup_config: Optional BeautifulSoup config override
94
"""
95
96
def open_relative(self, url, *args, **kwargs):
97
"""
98
Open relative URL from current page.
99
100
Parameters:
101
- url: Relative URL path
102
- *args, **kwargs: Forwarded to open()
103
"""
104
105
def refresh(self):
106
"""Reload the current page"""
107
108
def absolute_url(self, url):
109
"""
110
Convert relative URL to absolute based on current page.
111
112
Parameters:
113
- url: Relative or absolute URL
114
115
Returns:
116
Absolute URL string
117
"""
118
```
119
120
**Usage Example:**
121
122
```python
123
browser = mechanicalsoup.StatefulBrowser()
124
125
# Open initial page
126
browser.open("https://httpbin.org/")
127
128
# Navigate to relative URL
129
browser.open_relative("/forms/post")
130
131
# Refresh current page
132
browser.refresh()
133
134
# Convert relative to absolute URL
135
abs_url = browser.absolute_url("../status/200")
136
print(abs_url) # https://httpbin.org/status/200
137
```
138
139
### Link Discovery and Following
140
141
Find and follow links on the current page.
142
143
```python { .api }
144
def links(self, url_regex=None, link_text=None, *args, **kwargs):
145
"""
146
Get links from current page matching criteria.
147
148
Parameters:
149
- url_regex: Regular expression to match link URLs
150
- link_text: Text content to match in link text
151
- *args, **kwargs: Additional BeautifulSoup find parameters
152
153
Returns:
154
List of BeautifulSoup Tag objects
155
"""
156
157
def list_links(self, *args, **kwargs):
158
"""Print all links in current page for debugging"""
159
160
def find_link(self, *args, **kwargs):
161
"""
162
Find single link matching criteria.
163
164
Returns:
165
BeautifulSoup Tag object or None
166
"""
167
168
def follow_link(self, link=None, *bs4_args, bs4_kwargs={}, requests_kwargs={}, **kwargs):
169
"""
170
Follow a link and update browser state.
171
172
Parameters:
173
- link: Link Tag object, or search criteria if None
174
- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
175
- requests_kwargs: Parameters for the HTTP request
176
- **kwargs: Additional search parameters
177
178
Returns:
179
requests.Response with soup attribute
180
"""
181
```
182
183
**Usage Example:**
184
185
```python
186
browser = mechanicalsoup.StatefulBrowser()
187
browser.open("https://httpbin.org/")
188
189
# Get all links
190
all_links = browser.links()
191
print(f"Found {len(all_links)} links")
192
193
# Find links with specific text
194
status_links = browser.links(link_text="Status codes")
195
196
# Find link by URL pattern
197
import re
198
json_links = browser.links(url_regex=re.compile(r"/json"))
199
200
# Follow first link
201
if all_links:
202
response = browser.follow_link(all_links[0])
203
print(f"Followed to: {browser.url}")
204
205
# Follow link by search criteria
206
browser.follow_link(link_text="Forms")
207
```
208
209
### Link Download
210
211
Download link content to files.
212
213
```python { .api }
214
def download_link(self, link=None, file=None, *bs4_args, bs4_kwargs={},
215
requests_kwargs={}, **kwargs):
216
"""
217
Download link content to file.
218
219
Parameters:
220
- link: Link Tag object, or search criteria if None
221
- file: File path or file-like object for output
222
- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
223
- requests_kwargs: Parameters for the HTTP request
224
- **kwargs: Additional search parameters
225
226
Returns:
227
requests.Response object
228
"""
229
```
230
231
**Usage Example:**
232
233
```python
234
browser = mechanicalsoup.StatefulBrowser()
235
browser.open("https://httpbin.org/")
236
237
# Download first link to file
238
links = browser.links()
239
if links:
240
browser.download_link(links[0], file="downloaded.html")
241
242
# Download by search criteria
243
browser.download_link(link_text="JSON", file="api_doc.html")
244
```
245
246
### Form Selection and Interaction
247
248
Select and interact with forms on the current page.
249
250
```python { .api }
251
def select_form(self, selector="form", nr=0):
252
"""
253
Select a form on the current page.
254
255
Parameters:
256
- selector: CSS selector or BeautifulSoup search criteria
257
- nr: Form index if multiple matches (0-based)
258
259
Returns:
260
Form object
261
"""
262
263
def submit_selected(self, btnName=None, update_state=True, **kwargs):
264
"""
265
Submit the currently selected form.
266
267
Parameters:
268
- btnName: Name of submit button to use
269
- update_state: Whether to update browser state with response
270
- **kwargs: Additional request parameters
271
272
Returns:
273
requests.Response with soup attribute
274
"""
275
276
def new_control(self, type, name, value, **kwargs):
277
"""
278
Add new control to selected form.
279
280
Parameters:
281
- type: Input type (text, hidden, etc.)
282
- name: Control name
283
- value: Control value
284
- **kwargs: Additional attributes
285
"""
286
287
def __setitem__(self, name, value):
288
"""Set form field value using bracket notation"""
289
```
290
291
**Usage Example:**
292
293
```python
294
browser = mechanicalsoup.StatefulBrowser()
295
browser.open("https://httpbin.org/forms/post")
296
297
# Select form by CSS selector
298
browser.select_form('form[action="/post"]')
299
300
# Set form fields
301
browser["custname"] = "John Doe"
302
browser["custtel"] = "555-1234"
303
304
# Add new hidden field
305
browser.new_control("hidden", "session_id", "abc123")
306
307
# Submit form
308
response = browser.submit_selected()
309
print(response.json())
310
```
311
312
### Debug and Development Tools
313
314
Tools for debugging web automation workflows.
315
316
```python { .api }
317
def set_debug(self, debug):
318
"""
319
Enable/disable debug mode.
320
321
Parameters:
322
- debug: Boolean debug flag
323
"""
324
325
def get_debug(self):
326
"""Get current debug mode status"""
327
328
def set_verbose(self, verbose):
329
"""
330
Set verbosity level.
331
332
Parameters:
333
- verbose: Verbosity level (0-2)
334
"""
335
336
def get_verbose(self):
337
"""Get current verbosity level"""
338
339
def launch_browser(self, soup=None):
340
"""
341
Launch external browser with current or specified page.
342
343
Parameters:
344
- soup: Optional BeautifulSoup object, uses current page if None
345
"""
346
```
347
348
**Usage Example:**
349
350
```python
351
browser = mechanicalsoup.StatefulBrowser()
352
353
# Enable debug mode
354
browser.set_debug(True)
355
356
# Set high verbosity
357
browser.set_verbose(2)
358
359
# Launch browser for visual debugging
360
browser.open("https://httpbin.org/forms/post")
361
browser.launch_browser() # Opens current page in system browser
362
```
363
364
### Legacy Compatibility Methods
365
366
Deprecated methods maintained for backward compatibility.
367
368
```python { .api }
369
def get_current_page(self):
370
"""Deprecated: Use .page property instead"""
371
372
def get_current_form(self):
373
"""Deprecated: Use .form property instead"""
374
375
def get_url(self):
376
"""Deprecated: Use .url property instead"""
377
```
378
379
## Complete Navigation Workflow Example
380
381
```python
382
import mechanicalsoup
383
import re
384
385
# Create browser and enable debugging
386
browser = mechanicalsoup.StatefulBrowser(user_agent="MyBot/1.0")
387
browser.set_verbose(1)
388
389
# Navigate to a form page
390
browser.open("https://httpbin.org/forms/post")
391
392
# Examine current page
393
print(f"Page title: {browser.page.title.string}")
394
print(f"Current URL: {browser.url}")
395
396
# Find and select form
397
browser.select_form()
398
399
# Fill form fields
400
browser["custname"] = "Jane Smith"
401
browser["custtel"] = "555-9876"
402
browser.form.set_radio({"size": "large"})
403
404
# Submit and follow response
405
response = browser.submit_selected()
406
print(f"Form submitted to: {browser.url}")
407
408
# Navigate using links
409
browser.open("https://httpbin.org/")
410
json_links = browser.links(url_regex=re.compile(r"/json"))
411
if json_links:
412
browser.follow_link(json_links[0])
413
print(f"JSON endpoint content: {browser.page}")
414
415
# Clean up
416
browser.close()
417
```