A Python library for automating interaction with websites, providing web scraping and form submission capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-level browser that maintains page state and provides convenient methods for navigation, link following, and multi-step web interactions. StatefulBrowser inherits from Browser and is recommended for most web automation applications.
Create a StatefulBrowser instance with all Browser configuration options.
class StatefulBrowser(Browser):
def __init__(self, *args, **kwargs):
"""
Create a StatefulBrowser instance.
All parameters are forwarded to Browser.__init__()
"""Usage Example:
import mechanicalsoup
# Basic stateful browser
browser = mechanicalsoup.StatefulBrowser()
# With custom configuration
browser = mechanicalsoup.StatefulBrowser(
raise_on_404=True,
user_agent="MyScript/1.0"
)Access current page state including content, URL, and selected form.
@property
def page(self):
"""Current page BeautifulSoup object (read-only)"""
@property
def url(self):
"""Current page URL string (read-only)"""
@property
def form(self):
"""Currently selected Form object (read-only)"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/html")
# Access current page content
print(browser.page.title.string)
# Access current URL
print(f"Current URL: {browser.url}")
# Access selected form (if any)
if browser.form:
print(f"Current form action: {browser.form.form.get('action')}")Navigate to URLs and manage page state.
def open(self, url, *args, **kwargs):
"""
Open URL and update browser state.
Parameters:
- url: URL to open
- *args, **kwargs: Forwarded to Browser.get()
Returns:
requests.Response with soup attribute
"""
def open_fake_page(self, page_text, url=None, soup_config=None):
"""
Mock page loading for testing purposes.
Parameters:
- page_text: HTML content as string
- url: Optional URL to associate with fake page
- soup_config: Optional BeautifulSoup config override
"""
def open_relative(self, url, *args, **kwargs):
"""
Open relative URL from current page.
Parameters:
- url: Relative URL path
- *args, **kwargs: Forwarded to open()
"""
def refresh(self):
"""Reload the current page"""
def absolute_url(self, url):
"""
Convert relative URL to absolute based on current page.
Parameters:
- url: Relative or absolute URL
Returns:
Absolute URL string
"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
# Open initial page
browser.open("https://httpbin.org/")
# Navigate to relative URL
browser.open_relative("/forms/post")
# Refresh current page
browser.refresh()
# Convert relative to absolute URL
abs_url = browser.absolute_url("../status/200")
print(abs_url) # https://httpbin.org/status/200Find and follow links on the current page.
def links(self, url_regex=None, link_text=None, *args, **kwargs):
"""
Get links from current page matching criteria.
Parameters:
- url_regex: Regular expression to match link URLs
- link_text: Text content to match in link text
- *args, **kwargs: Additional BeautifulSoup find parameters
Returns:
List of BeautifulSoup Tag objects
"""
def list_links(self, *args, **kwargs):
"""Print all links in current page for debugging"""
def find_link(self, *args, **kwargs):
"""
Find single link matching criteria.
Returns:
BeautifulSoup Tag object or None
"""
def follow_link(self, link=None, *bs4_args, bs4_kwargs={}, requests_kwargs={}, **kwargs):
"""
Follow a link and update browser state.
Parameters:
- link: Link Tag object, or search criteria if None
- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
- requests_kwargs: Parameters for the HTTP request
- **kwargs: Additional search parameters
Returns:
requests.Response with soup attribute
"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")
# Get all links
all_links = browser.links()
print(f"Found {len(all_links)} links")
# Find links with specific text
status_links = browser.links(link_text="Status codes")
# Find link by URL pattern
import re
json_links = browser.links(url_regex=re.compile(r"/json"))
# Follow first link
if all_links:
response = browser.follow_link(all_links[0])
print(f"Followed to: {browser.url}")
# Follow link by search criteria
browser.follow_link(link_text="Forms")Download link content to files.
def download_link(self, link=None, file=None, *bs4_args, bs4_kwargs={},
requests_kwargs={}, **kwargs):
"""
Download link content to file.
Parameters:
- link: Link Tag object, or search criteria if None
- file: File path or file-like object for output
- bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
- requests_kwargs: Parameters for the HTTP request
- **kwargs: Additional search parameters
Returns:
requests.Response object
"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")
# Download first link to file
links = browser.links()
if links:
browser.download_link(links[0], file="downloaded.html")
# Download by search criteria
browser.download_link(link_text="JSON", file="api_doc.html")Select and interact with forms on the current page.
def select_form(self, selector="form", nr=0):
"""
Select a form on the current page.
Parameters:
- selector: CSS selector or BeautifulSoup search criteria
- nr: Form index if multiple matches (0-based)
Returns:
Form object
"""
def submit_selected(self, btnName=None, update_state=True, **kwargs):
"""
Submit the currently selected form.
Parameters:
- btnName: Name of submit button to use
- update_state: Whether to update browser state with response
- **kwargs: Additional request parameters
Returns:
requests.Response with soup attribute
"""
def new_control(self, type, name, value, **kwargs):
"""
Add new control to selected form.
Parameters:
- type: Input type (text, hidden, etc.)
- name: Control name
- value: Control value
- **kwargs: Additional attributes
"""
def __setitem__(self, name, value):
"""Set form field value using bracket notation"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")
# Select form by CSS selector
browser.select_form('form[action="/post"]')
# Set form fields
browser["custname"] = "John Doe"
browser["custtel"] = "555-1234"
# Add new hidden field
browser.new_control("hidden", "session_id", "abc123")
# Submit form
response = browser.submit_selected()
print(response.json())Tools for debugging web automation workflows.
def set_debug(self, debug):
"""
Enable/disable debug mode.
Parameters:
- debug: Boolean debug flag
"""
def get_debug(self):
"""Get current debug mode status"""
def set_verbose(self, verbose):
"""
Set verbosity level.
Parameters:
- verbose: Verbosity level (0-2)
"""
def get_verbose(self):
"""Get current verbosity level"""
def launch_browser(self, soup=None):
"""
Launch external browser with current or specified page.
Parameters:
- soup: Optional BeautifulSoup object, uses current page if None
"""Usage Example:
browser = mechanicalsoup.StatefulBrowser()
# Enable debug mode
browser.set_debug(True)
# Set high verbosity
browser.set_verbose(2)
# Launch browser for visual debugging
browser.open("https://httpbin.org/forms/post")
browser.launch_browser() # Opens current page in system browserDeprecated methods maintained for backward compatibility.
def get_current_page(self):
"""Deprecated: Use .page property instead"""
def get_current_form(self):
"""Deprecated: Use .form property instead"""
def get_url(self):
"""Deprecated: Use .url property instead"""import mechanicalsoup
import re
# Create browser and enable debugging
browser = mechanicalsoup.StatefulBrowser(user_agent="MyBot/1.0")
browser.set_verbose(1)
# Navigate to a form page
browser.open("https://httpbin.org/forms/post")
# Examine current page
print(f"Page title: {browser.page.title.string}")
print(f"Current URL: {browser.url}")
# Find and select form
browser.select_form()
# Fill form fields
browser["custname"] = "Jane Smith"
browser["custtel"] = "555-9876"
browser.form.set_radio({"size": "large"})
# Submit and follow response
response = browser.submit_selected()
print(f"Form submitted to: {browser.url}")
# Navigate using links
browser.open("https://httpbin.org/")
json_links = browser.links(url_regex=re.compile(r"/json"))
if json_links:
browser.follow_link(json_links[0])
print(f"JSON endpoint content: {browser.page}")
# Clean up
browser.close()Install with Tessl CLI
npx tessl i tessl/pypi-mechanicalsoup