CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mechanicalsoup

A Python library for automating interaction with websites, providing web scraping and form submission capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

utilities.mddocs/

Utilities and Error Handling

Exception classes and utility functions for error handling and form analysis. These components provide robust error handling capabilities and helper functions for advanced form processing.

Capabilities

Exception Classes

Base exception classes for MechanicalSoup error handling.

class LinkNotFoundError(Exception):
    """
    Base exception for when MechanicalSoup fails to find elements.
    
    Raised in scenarios such as:
    - Link not found during navigation
    - Form field not found
    - 404 errors when raise_on_404=True in Browser
    - Form or element selection failures
    """

class InvalidFormMethod(LinkNotFoundError):
    """
    Exception raised when form method used on wrong element type.
    
    Inherits from LinkNotFoundError and is typically raised when:
    - Attempting form operations on non-form elements
    - Invalid field type operations
    - Form method validation failures
    """

Usage Example:

import mechanicalsoup

browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)

try:
    # This will raise LinkNotFoundError if page not found
    browser.open("https://httpbin.org/status/404")
except mechanicalsoup.LinkNotFoundError as e:
    print(f"Page not found: {e}")

try:
    # This will raise LinkNotFoundError if form not found
    browser.select_form("nonexistent-form")
except mechanicalsoup.LinkNotFoundError as e:
    print(f"Form selection failed: {e}")

try:
    # This might raise InvalidFormMethod for invalid operations
    browser.select_form()
    browser.form.set("invalid_field", "value", force=False)
except mechanicalsoup.InvalidFormMethod as e:
    print(f"Invalid form operation: {e}")

Form Analysis Utilities

Utility functions for analyzing form elements and upload capabilities.

def is_multipart_file_upload(form, tag):
    """
    Check if form element is a multipart file upload.
    
    Analyzes form encoding and input types to determine if the form
    supports file uploads through multipart/form-data encoding.
    
    Parameters:
    - form: BeautifulSoup form element to check
    - tag: BeautifulSoup input tag to analyze
    
    Returns:
    bool: True if element supports multipart file upload, False otherwise
    """

Usage Example:

import mechanicalsoup
from mechanicalsoup import is_multipart_file_upload

browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")

form_element = browser.page.find("form")
file_inputs = form_element.find_all("input", type="file")

for input_tag in file_inputs:
    if is_multipart_file_upload(form_element, input_tag):
        print(f"File upload field found: {input_tag.get('name')}")
    else:
        print(f"Regular input field: {input_tag.get('name')}")

Error Handling Patterns

Graceful Navigation Error Handling

Handle navigation and page loading errors gracefully.

import mechanicalsoup

def safe_navigate(browser, url, max_retries=3):
    """
    Safely navigate to URL with retry logic and error handling.
    """
    for attempt in range(max_retries):
        try:
            response = browser.open(url)
            if response.status_code == 200:
                return response
            else:
                print(f"HTTP {response.status_code} on attempt {attempt + 1}")
        except mechanicalsoup.LinkNotFoundError as e:
            print(f"Navigation failed on attempt {attempt + 1}: {e}")
            if attempt == max_retries - 1:
                raise
        except Exception as e:
            print(f"Unexpected error on attempt {attempt + 1}: {e}")
            if attempt == max_retries - 1:
                raise
    
    return None

# Usage
browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)
try:
    response = safe_navigate(browser, "https://httpbin.org/status/500")
    if response:
        print("Navigation successful")
except mechanicalsoup.LinkNotFoundError:
    print("All navigation attempts failed")

Form Operation Error Handling

Handle form selection and field setting errors.

import mechanicalsoup

def safe_form_fill(browser, form_data, form_selector="form"):
    """
    Safely fill form with comprehensive error handling.
    """
    try:
        # Select form with error handling
        browser.select_form(form_selector)
        if not browser.form:
            raise mechanicalsoup.LinkNotFoundError(f"No form found with selector: {form_selector}")
        
        # Fill form fields with individual error handling
        for field_name, field_value in form_data.items():
            try:
                browser[field_name] = field_value
                print(f"Set {field_name} = {field_value}")
            except mechanicalsoup.InvalidFormMethod as e:
                print(f"Failed to set field {field_name}: {e}")
                # Try force setting as fallback
                try:
                    browser.form.set(field_name, field_value, force=True)
                    print(f"Force set {field_name} = {field_value}")
                except Exception as force_error:
                    print(f"Force setting also failed for {field_name}: {force_error}")
        
        return True
        
    except mechanicalsoup.LinkNotFoundError as e:
        print(f"Form selection failed: {e}")
        return False
    except Exception as e:
        print(f"Unexpected form filling error: {e}")
        return False

# Usage
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")

form_data = {
    "custname": "Test User",
    "custtel": "555-0000",
    "nonexistent_field": "test_value"
}

if safe_form_fill(browser, form_data):
    try:
        response = browser.submit_selected()
        print("Form submitted successfully")
    except Exception as e:
        print(f"Form submission failed: {e}")

Link Following Error Handling

Handle link discovery and following with fallback strategies.

import mechanicalsoup
import re

def follow_link_safe(browser, search_criteria, fallback_url=None):
    """
    Safely follow links with multiple search strategies and fallbacks.
    """
    try:
        # Try to find link by various criteria
        link = None
        
        if isinstance(search_criteria, str):
            # Search by text
            link = browser.find_link(link_text=search_criteria)
        elif isinstance(search_criteria, dict):
            # Search by multiple criteria
            link = browser.find_link(**search_criteria)
        
        if link:
            browser.follow_link(link)
            print(f"Successfully followed link to: {browser.url}")
            return True
        else:
            print("Link not found with given criteria")
            
            # Fallback to direct URL if provided
            if fallback_url:
                print(f"Using fallback URL: {fallback_url}")
                browser.open(fallback_url)
                return True
                
    except mechanicalsoup.LinkNotFoundError as e:
        print(f"Link following failed: {e}")
        if fallback_url:
            print(f"Using fallback URL: {fallback_url}")
            browser.open(fallback_url)
            return True
    except Exception as e:
        print(f"Unexpected link following error: {e}")
    
    return False

# Usage
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")

# Try multiple search strategies
search_strategies = [
    "JSON",  # Search by text
    {"url_regex": re.compile(r"/json")},  # Search by URL pattern
    {"href": "/json"}  # Search by exact href
]

for strategy in search_strategies:
    if follow_link_safe(browser, strategy, fallback_url="https://httpbin.org/json"):
        break

File Upload Handling

Handle file upload forms with proper multipart detection.

import mechanicalsoup
from mechanicalsoup import is_multipart_file_upload
import os

def handle_file_upload_form(browser, file_field_name, file_path, form_data=None):
    """
    Handle file upload forms with proper validation and error handling.
    """
    try:
        # Ensure form is selected
        if not browser.form:
            browser.select_form()
        
        form_element = browser.form.form
        file_input = form_element.find("input", {"name": file_field_name, "type": "file"})
        
        if not file_input:
            raise mechanicalsoup.InvalidFormMethod(f"File input '{file_field_name}' not found")
        
        # Verify multipart capability
        if not is_multipart_file_upload(form_element, file_input):
            print("Warning: Form may not support file uploads properly")
        
        # Check file exists
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File not found: {file_path}")
        
        # Fill other form fields if provided
        if form_data:
            for field_name, field_value in form_data.items():
                browser[field_name] = field_value
        
        # Handle file upload (typically done through requests)
        with open(file_path, 'rb') as file:
            files = {file_field_name: file}
            response = browser.session.post(
                browser.absolute_url(form_element.get('action', '')),
                files=files,
                data={input.get('name'): input.get('value', '') 
                      for input in form_element.find_all('input') 
                      if input.get('type') != 'file'}
            )
        
        print(f"File upload successful: {response.status_code}")
        return response
        
    except mechanicalsoup.InvalidFormMethod as e:
        print(f"Invalid form method for file upload: {e}")
    except FileNotFoundError as e:
        print(f"File error: {e}")
    except Exception as e:
        print(f"File upload error: {e}")
    
    return None

# Usage example (conceptual - would need actual file upload form)
browser = mechanicalsoup.StatefulBrowser()
# browser.open("https://file-upload-form.example.com")
# 
# response = handle_file_upload_form(
#     browser, 
#     file_field_name="document",
#     file_path="/path/to/document.pdf",
#     form_data={"description": "Important document"}
# )

Exception Hierarchy

Exception
└── LinkNotFoundError
    └── InvalidFormMethod

All MechanicalSoup-specific exceptions inherit from LinkNotFoundError, making it easy to catch all library-related errors:

import mechanicalsoup

browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)

try:
    browser.open("https://httpbin.org/status/404")
    browser.select_form("nonexistent")
    browser["field"] = "value"
    browser.submit_selected()
except mechanicalsoup.LinkNotFoundError as e:
    # Catches both LinkNotFoundError and InvalidFormMethod
    print(f"MechanicalSoup operation failed: {e}")
except Exception as e:
    # Catches other unexpected errors
    print(f"Unexpected error: {e}")

Version Information

Access the MechanicalSoup package version.

__version__: str
    """The version string of the installed MechanicalSoup package."""

Usage Example:

import mechanicalsoup

# Check the installed version
print(f"MechanicalSoup version: {mechanicalsoup.__version__}")

# Use version for compatibility checks
from packaging import version
if version.parse(mechanicalsoup.__version__) >= version.parse("1.4.0"):
    print("Using modern MechanicalSoup with latest features")
else:
    print("Consider upgrading MechanicalSoup")

Install with Tessl CLI

npx tessl i tessl/pypi-mechanicalsoup

docs

browser.md

forms.md

index.md

navigation.md

utilities.md

tile.json