CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mechanicalsoup

A Python library for automating interaction with websites, providing web scraping and form submission capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

navigation.mddocs/

Stateful Web Navigation

High-level browser that maintains page state and provides convenient methods for navigation, link following, and multi-step web interactions. StatefulBrowser inherits from Browser and is recommended for most web automation applications.

Capabilities

Browser Creation

Create a StatefulBrowser instance with all Browser configuration options.

class StatefulBrowser(Browser):
    def __init__(self, *args, **kwargs):
        """
        Create a StatefulBrowser instance.
        All parameters are forwarded to Browser.__init__()
        """

Usage Example:

import mechanicalsoup

# Basic stateful browser
browser = mechanicalsoup.StatefulBrowser()

# With custom configuration
browser = mechanicalsoup.StatefulBrowser(
    raise_on_404=True,
    user_agent="MyScript/1.0"
)

Page State Properties

Access current page state including content, URL, and selected form.

@property
def page(self):
    """Current page BeautifulSoup object (read-only)"""
    
@property 
def url(self):
    """Current page URL string (read-only)"""
    
@property
def form(self):
    """Currently selected Form object (read-only)"""

Usage Example:

browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/html")

# Access current page content
print(browser.page.title.string)

# Access current URL
print(f"Current URL: {browser.url}")

# Access selected form (if any)
if browser.form:
    print(f"Current form action: {browser.form.form.get('action')}")

Page Navigation

Navigate to URLs and manage page state.

def open(self, url, *args, **kwargs):
    """
    Open URL and update browser state.
    
    Parameters:
    - url: URL to open
    - *args, **kwargs: Forwarded to Browser.get()
    
    Returns:
    requests.Response with soup attribute
    """

def open_fake_page(self, page_text, url=None, soup_config=None):
    """
    Mock page loading for testing purposes.
    
    Parameters:
    - page_text: HTML content as string
    - url: Optional URL to associate with fake page
    - soup_config: Optional BeautifulSoup config override
    """

def open_relative(self, url, *args, **kwargs):
    """
    Open relative URL from current page.
    
    Parameters:
    - url: Relative URL path
    - *args, **kwargs: Forwarded to open()
    """

def refresh(self):
    """Reload the current page"""

def absolute_url(self, url):
    """
    Convert relative URL to absolute based on current page.
    
    Parameters:
    - url: Relative or absolute URL
    
    Returns:
    Absolute URL string
    """

Usage Example:

browser = mechanicalsoup.StatefulBrowser()

# Open initial page
browser.open("https://httpbin.org/")

# Navigate to relative URL
browser.open_relative("/forms/post")

# Refresh current page
browser.refresh()

# Convert relative to absolute URL
abs_url = browser.absolute_url("../status/200")
print(abs_url)  # https://httpbin.org/status/200

Link Discovery and Following

Find and follow links on the current page.

def links(self, url_regex=None, link_text=None, *args, **kwargs):
    """
    Get links from current page matching criteria.
    
    Parameters:
    - url_regex: Regular expression to match link URLs
    - link_text: Text content to match in link text
    - *args, **kwargs: Additional BeautifulSoup find parameters
    
    Returns:
    List of BeautifulSoup Tag objects
    """

def list_links(self, *args, **kwargs):
    """Print all links in current page for debugging"""

def find_link(self, *args, **kwargs):
    """
    Find single link matching criteria.
    
    Returns:
    BeautifulSoup Tag object or None
    """

def follow_link(self, link=None, *bs4_args, bs4_kwargs={}, requests_kwargs={}, **kwargs):
    """
    Follow a link and update browser state.
    
    Parameters:
    - link: Link Tag object, or search criteria if None
    - bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
    - requests_kwargs: Parameters for the HTTP request
    - **kwargs: Additional search parameters
    
    Returns:
    requests.Response with soup attribute
    """

Usage Example:

browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")

# Get all links
all_links = browser.links()
print(f"Found {len(all_links)} links")

# Find links with specific text
status_links = browser.links(link_text="Status codes")

# Find link by URL pattern
import re
json_links = browser.links(url_regex=re.compile(r"/json"))

# Follow first link
if all_links:
    response = browser.follow_link(all_links[0])
    print(f"Followed to: {browser.url}")

# Follow link by search criteria
browser.follow_link(link_text="Forms")

Link Download

Download link content to files.

def download_link(self, link=None, file=None, *bs4_args, bs4_kwargs={}, 
                  requests_kwargs={}, **kwargs):
    """
    Download link content to file.
    
    Parameters:
    - link: Link Tag object, or search criteria if None
    - file: File path or file-like object for output
    - bs4_args, bs4_kwargs: BeautifulSoup search parameters if link is None
    - requests_kwargs: Parameters for the HTTP request
    - **kwargs: Additional search parameters
    
    Returns:
    requests.Response object
    """

Usage Example:

browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")

# Download first link to file
links = browser.links()
if links:
    browser.download_link(links[0], file="downloaded.html")

# Download by search criteria
browser.download_link(link_text="JSON", file="api_doc.html")

Form Selection and Interaction

Select and interact with forms on the current page.

def select_form(self, selector="form", nr=0):
    """
    Select a form on the current page.
    
    Parameters:
    - selector: CSS selector or BeautifulSoup search criteria
    - nr: Form index if multiple matches (0-based)
    
    Returns:
    Form object
    """

def submit_selected(self, btnName=None, update_state=True, **kwargs):
    """
    Submit the currently selected form.
    
    Parameters:
    - btnName: Name of submit button to use
    - update_state: Whether to update browser state with response
    - **kwargs: Additional request parameters
    
    Returns:
    requests.Response with soup attribute
    """

def new_control(self, type, name, value, **kwargs):
    """
    Add new control to selected form.
    
    Parameters:
    - type: Input type (text, hidden, etc.)
    - name: Control name
    - value: Control value
    - **kwargs: Additional attributes
    """

def __setitem__(self, name, value):
    """Set form field value using bracket notation"""

Usage Example:

browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")

# Select form by CSS selector
browser.select_form('form[action="/post"]')

# Set form fields
browser["custname"] = "John Doe"
browser["custtel"] = "555-1234"

# Add new hidden field
browser.new_control("hidden", "session_id", "abc123")

# Submit form
response = browser.submit_selected()
print(response.json())

Debug and Development Tools

Tools for debugging web automation workflows.

def set_debug(self, debug):
    """
    Enable/disable debug mode.
    
    Parameters:
    - debug: Boolean debug flag
    """

def get_debug(self):
    """Get current debug mode status"""

def set_verbose(self, verbose):
    """
    Set verbosity level.
    
    Parameters:
    - verbose: Verbosity level (0-2)
    """

def get_verbose(self):
    """Get current verbosity level"""

def launch_browser(self, soup=None):
    """
    Launch external browser with current or specified page.
    
    Parameters:
    - soup: Optional BeautifulSoup object, uses current page if None
    """

Usage Example:

browser = mechanicalsoup.StatefulBrowser()

# Enable debug mode
browser.set_debug(True)

# Set high verbosity
browser.set_verbose(2)

# Launch browser for visual debugging
browser.open("https://httpbin.org/forms/post")
browser.launch_browser()  # Opens current page in system browser

Legacy Compatibility Methods

Deprecated methods maintained for backward compatibility.

def get_current_page(self):
    """Deprecated: Use .page property instead"""

def get_current_form(self):
    """Deprecated: Use .form property instead"""

def get_url(self):
    """Deprecated: Use .url property instead"""

Complete Navigation Workflow Example

import mechanicalsoup
import re

# Create browser and enable debugging
browser = mechanicalsoup.StatefulBrowser(user_agent="MyBot/1.0")
browser.set_verbose(1)

# Navigate to a form page
browser.open("https://httpbin.org/forms/post")

# Examine current page
print(f"Page title: {browser.page.title.string}")
print(f"Current URL: {browser.url}")

# Find and select form
browser.select_form()

# Fill form fields
browser["custname"] = "Jane Smith"
browser["custtel"] = "555-9876"
browser.form.set_radio({"size": "large"})

# Submit and follow response
response = browser.submit_selected()
print(f"Form submitted to: {browser.url}")

# Navigate using links
browser.open("https://httpbin.org/")
json_links = browser.links(url_regex=re.compile(r"/json"))
if json_links:
    browser.follow_link(json_links[0])
    print(f"JSON endpoint content: {browser.page}")

# Clean up
browser.close()

Install with Tessl CLI

npx tessl i tessl/pypi-mechanicalsoup

docs

browser.md

forms.md

index.md

navigation.md

utilities.md

tile.json