or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

browser.mdforms.mdindex.mdnavigation.mdutilities.md

navigation.mddocs/

0

# Stateful Web Navigation

1

2

High-level browser that maintains page state and provides convenient methods for navigation, link following, and multi-step web interactions. StatefulBrowser inherits from Browser and is recommended for most web automation applications.

3

4

## Capabilities

5

6

### Browser Creation

7

8

Create a StatefulBrowser instance with all Browser configuration options.

9

10

```python { .api }

11

class StatefulBrowser(Browser):

12

def __init__(self, *args, **kwargs):

13

"""

14

Create a StatefulBrowser instance.

15

All parameters are forwarded to Browser.__init__()

16

"""

17

```

18

19

**Usage Example:**

20

21

```python

22

import mechanicalsoup

23

24

# Basic stateful browser

25

browser = mechanicalsoup.StatefulBrowser()

26

27

# With custom configuration

28

browser = mechanicalsoup.StatefulBrowser(

29

raise_on_404=True,

30

user_agent="MyScript/1.0"

31

)

32

```

33

34

### Page State Properties

35

36

Access current page state including content, URL, and selected form.

37

38

```python { .api }

39

@property

40

def page(self):

41

"""Current page BeautifulSoup object (read-only)"""

42

43

@property

44

def url(self):

45

"""Current page URL string (read-only)"""

46

47

@property

48

def form(self):

49

"""Currently selected Form object (read-only)"""

50

```

51

52

**Usage Example:**

53

54

```python

55

browser = mechanicalsoup.StatefulBrowser()

56

browser.open("https://httpbin.org/html")

57

58

# Access current page content

59

print(browser.page.title.string)

60

61

# Access current URL

62

print(f"Current URL: {browser.url}")

63

64

# Access selected form (if any)

65

if browser.form:

66

print(f"Current form action: {browser.form.form.get('action')}")

67

```

68

69

### Page Navigation

70

71

Navigate to URLs and manage page state.

72

73

```python { .api }

74

def open(self, url, *args, **kwargs):

75

"""

76

Open URL and update browser state.

77

78

Parameters:

79

- url: URL to open

80

- *args, **kwargs: Forwarded to Browser.get()

81

82

Returns:

83

requests.Response with soup attribute

84

"""

85

86

def open_fake_page(self, page_text, url=None, soup_config=None):

87

"""

88

Mock page loading for testing purposes.

89

90

Parameters:

91

- page_text: HTML content as string

92

- url: Optional URL to associate with fake page

93

- soup_config: Optional BeautifulSoup config override

94

"""

95

96

def open_relative(self, url, *args, **kwargs):

97

"""

98

Open relative URL from current page.

99

100

Parameters:

101

- url: Relative URL path

102

- *args, **kwargs: Forwarded to open()

103

"""

104

105

def refresh(self):

106

"""Reload the current page"""

107

108

def absolute_url(self, url):

109

"""

110

Convert relative URL to absolute based on current page.

111

112

Parameters:

113

- url: Relative or absolute URL

114

115

Returns:

116

Absolute URL string

117

"""

118

```

119

120

**Usage Example:**

121

122

```python

123

browser = mechanicalsoup.StatefulBrowser()

124

125

# Open initial page

126

browser.open("https://httpbin.org/")

127

128

# Navigate to relative URL

129

browser.open_relative("/forms/post")

130

131

# Refresh current page

132

browser.refresh()

133

134

# Convert relative to absolute URL

135

abs_url = browser.absolute_url("../status/200")

136

print(abs_url) # https://httpbin.org/status/200

137

```

138

139

### Link Discovery and Following

140

141

Find and follow links on the current page.

142

143

```python { .api }

144

def links(self, url_regex=None, link_text=None, *args, **kwargs):

145

"""

146

Get links from current page matching criteria.

147

148

Parameters:

149

- url_regex: Regular expression to match link URLs

150

- link_text: Text content to match in link text

151

- *args, **kwargs: Additional BeautifulSoup find parameters

152

153

Returns:

154

List of BeautifulSoup Tag objects

155

"""

156

157

def list_links(self, *args, **kwargs):

158

"""Print all links in current page for debugging"""

159

160

def find_link(self, *args, **kwargs):

161

"""

162

Find single link matching criteria.

163

164

Returns:

165

BeautifulSoup Tag object or None

166

"""

167

168

def follow_link(self, link=None, *bs4_args, bs4_kwargs={}, requests_kwargs={}, **kwargs):

169

"""

170

Follow a link and update browser state.

171

172

Parameters:

173

- link: Link Tag object, or search criteria if None

174

- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None

175

- requests_kwargs: Parameters for the HTTP request

176

- **kwargs: Additional search parameters

177

178

Returns:

179

requests.Response with soup attribute

180

"""

181

```

182

183

**Usage Example:**

184

185

```python

186

browser = mechanicalsoup.StatefulBrowser()

187

browser.open("https://httpbin.org/")

188

189

# Get all links

190

all_links = browser.links()

191

print(f"Found {len(all_links)} links")

192

193

# Find links with specific text

194

status_links = browser.links(link_text="Status codes")

195

196

# Find link by URL pattern

197

import re

198

json_links = browser.links(url_regex=re.compile(r"/json"))

199

200

# Follow first link

201

if all_links:

202

response = browser.follow_link(all_links[0])

203

print(f"Followed to: {browser.url}")

204

205

# Follow link by search criteria

206

browser.follow_link(link_text="Forms")

207

```

208

209

### Link Download

210

211

Download link content to files.

212

213

```python { .api }

214

def download_link(self, link=None, file=None, *bs4_args, bs4_kwargs={},

215

requests_kwargs={}, **kwargs):

216

"""

217

Download link content to file.

218

219

Parameters:

220

- link: Link Tag object, or search criteria if None

221

- file: File path or file-like object for output

222

- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None

223

- requests_kwargs: Parameters for the HTTP request

224

- **kwargs: Additional search parameters

225

226

Returns:

227

requests.Response object

228

"""

229

```

230

231

**Usage Example:**

232

233

```python

234

browser = mechanicalsoup.StatefulBrowser()

235

browser.open("https://httpbin.org/")

236

237

# Download first link to file

238

links = browser.links()

239

if links:

240

browser.download_link(links[0], file="downloaded.html")

241

242

# Download by search criteria

243

browser.download_link(link_text="JSON", file="api_doc.html")

244

```

245

246

### Form Selection and Interaction

247

248

Select and interact with forms on the current page.

249

250

```python { .api }

251

def select_form(self, selector="form", nr=0):

252

"""

253

Select a form on the current page.

254

255

Parameters:

256

- selector: CSS selector or BeautifulSoup search criteria

257

- nr: Form index if multiple matches (0-based)

258

259

Returns:

260

Form object

261

"""

262

263

def submit_selected(self, btnName=None, update_state=True, **kwargs):

264

"""

265

Submit the currently selected form.

266

267

Parameters:

268

- btnName: Name of submit button to use

269

- update_state: Whether to update browser state with response

270

- **kwargs: Additional request parameters

271

272

Returns:

273

requests.Response with soup attribute

274

"""

275

276

def new_control(self, type, name, value, **kwargs):

277

"""

278

Add new control to selected form.

279

280

Parameters:

281

- type: Input type (text, hidden, etc.)

282

- name: Control name

283

- value: Control value

284

- **kwargs: Additional attributes

285

"""

286

287

def __setitem__(self, name, value):

288

"""Set form field value using bracket notation"""

289

```

290

291

**Usage Example:**

292

293

```python

294

browser = mechanicalsoup.StatefulBrowser()

295

browser.open("https://httpbin.org/forms/post")

296

297

# Select form by CSS selector

298

browser.select_form('form[action="/post"]')

299

300

# Set form fields

301

browser["custname"] = "John Doe"

302

browser["custtel"] = "555-1234"

303

304

# Add new hidden field

305

browser.new_control("hidden", "session_id", "abc123")

306

307

# Submit form

308

response = browser.submit_selected()

309

print(response.json())

310

```

311

312

### Debug and Development Tools

313

314

Tools for debugging web automation workflows.

315

316

```python { .api }

317

def set_debug(self, debug):

318

"""

319

Enable/disable debug mode.

320

321

Parameters:

322

- debug: Boolean debug flag

323

"""

324

325

def get_debug(self):

326

"""Get current debug mode status"""

327

328

def set_verbose(self, verbose):

329

"""

330

Set verbosity level.

331

332

Parameters:

333

- verbose: Verbosity level (0-2)

334

"""

335

336

def get_verbose(self):

337

"""Get current verbosity level"""

338

339

def launch_browser(self, soup=None):

340

"""

341

Launch external browser with current or specified page.

342

343

Parameters:

344

- soup: Optional BeautifulSoup object, uses current page if None

345

"""

346

```

347

348

**Usage Example:**

349

350

```python

351

browser = mechanicalsoup.StatefulBrowser()

352

353

# Enable debug mode

354

browser.set_debug(True)

355

356

# Set high verbosity

357

browser.set_verbose(2)

358

359

# Launch browser for visual debugging

360

browser.open("https://httpbin.org/forms/post")

361

browser.launch_browser() # Opens current page in system browser

362

```

363

364

### Legacy Compatibility Methods

365

366

Deprecated methods maintained for backward compatibility.

367

368

```python { .api }

369

def get_current_page(self):

370

"""Deprecated: Use .page property instead"""

371

372

def get_current_form(self):

373

"""Deprecated: Use .form property instead"""

374

375

def get_url(self):

376

"""Deprecated: Use .url property instead"""

377

```

378

379

## Complete Navigation Workflow Example

380

381

```python

382

import mechanicalsoup

383

import re

384

385

# Create browser and enable debugging

386

browser = mechanicalsoup.StatefulBrowser(user_agent="MyBot/1.0")

387

browser.set_verbose(1)

388

389

# Navigate to a form page

390

browser.open("https://httpbin.org/forms/post")

391

392

# Examine current page

393

print(f"Page title: {browser.page.title.string}")

394

print(f"Current URL: {browser.url}")

395

396

# Find and select form

397

browser.select_form()

398

399

# Fill form fields

400

browser["custname"] = "Jane Smith"

401

browser["custtel"] = "555-9876"

402

browser.form.set_radio({"size": "large"})

403

404

# Submit and follow response

405

response = browser.submit_selected()

406

print(f"Form submitted to: {browser.url}")

407

408

# Navigate using links

409

browser.open("https://httpbin.org/")

410

json_links = browser.links(url_regex=re.compile(r"/json"))

411

if json_links:

412

browser.follow_link(json_links[0])

413

print(f"JSON endpoint content: {browser.page}")

414

415

# Clean up

416

browser.close()

417

```