A Python library for automating interaction with websites, providing web scraping and form submission capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Exception classes and utility functions for error handling and form analysis. These components provide robust error handling capabilities and helper functions for advanced form processing.
Base exception classes for MechanicalSoup error handling.
class LinkNotFoundError(Exception):
"""
Base exception for when MechanicalSoup fails to find elements.
Raised in scenarios such as:
- Link not found during navigation
- Form field not found
- 404 errors when raise_on_404=True in Browser
- Form or element selection failures
"""
class InvalidFormMethod(LinkNotFoundError):
"""
Exception raised when form method used on wrong element type.
Inherits from LinkNotFoundError and is typically raised when:
- Attempting form operations on non-form elements
- Invalid field type operations
- Form method validation failures
"""Usage Example:
import mechanicalsoup
browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)
try:
# This will raise LinkNotFoundError if page not found
browser.open("https://httpbin.org/status/404")
except mechanicalsoup.LinkNotFoundError as e:
print(f"Page not found: {e}")
try:
# This will raise LinkNotFoundError if form not found
browser.select_form("nonexistent-form")
except mechanicalsoup.LinkNotFoundError as e:
print(f"Form selection failed: {e}")
try:
# This might raise InvalidFormMethod for invalid operations
browser.select_form()
browser.form.set("invalid_field", "value", force=False)
except mechanicalsoup.InvalidFormMethod as e:
print(f"Invalid form operation: {e}")Utility functions for analyzing form elements and upload capabilities.
def is_multipart_file_upload(form, tag):
"""
Check if form element is a multipart file upload.
Analyzes form encoding and input types to determine if the form
supports file uploads through multipart/form-data encoding.
Parameters:
- form: BeautifulSoup form element to check
- tag: BeautifulSoup input tag to analyze
Returns:
bool: True if element supports multipart file upload, False otherwise
"""Usage Example:
import mechanicalsoup
from mechanicalsoup import is_multipart_file_upload
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")
form_element = browser.page.find("form")
file_inputs = form_element.find_all("input", type="file")
for input_tag in file_inputs:
if is_multipart_file_upload(form_element, input_tag):
print(f"File upload field found: {input_tag.get('name')}")
else:
print(f"Regular input field: {input_tag.get('name')}")Handle navigation and page loading errors gracefully.
import mechanicalsoup
def safe_navigate(browser, url, max_retries=3):
"""
Safely navigate to URL with retry logic and error handling.
"""
for attempt in range(max_retries):
try:
response = browser.open(url)
if response.status_code == 200:
return response
else:
print(f"HTTP {response.status_code} on attempt {attempt + 1}")
except mechanicalsoup.LinkNotFoundError as e:
print(f"Navigation failed on attempt {attempt + 1}: {e}")
if attempt == max_retries - 1:
raise
except Exception as e:
print(f"Unexpected error on attempt {attempt + 1}: {e}")
if attempt == max_retries - 1:
raise
return None
# Usage
browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)
try:
response = safe_navigate(browser, "https://httpbin.org/status/500")
if response:
print("Navigation successful")
except mechanicalsoup.LinkNotFoundError:
print("All navigation attempts failed")Handle form selection and field setting errors.
import mechanicalsoup
def safe_form_fill(browser, form_data, form_selector="form"):
"""
Safely fill form with comprehensive error handling.
"""
try:
# Select form with error handling
browser.select_form(form_selector)
if not browser.form:
raise mechanicalsoup.LinkNotFoundError(f"No form found with selector: {form_selector}")
# Fill form fields with individual error handling
for field_name, field_value in form_data.items():
try:
browser[field_name] = field_value
print(f"Set {field_name} = {field_value}")
except mechanicalsoup.InvalidFormMethod as e:
print(f"Failed to set field {field_name}: {e}")
# Try force setting as fallback
try:
browser.form.set(field_name, field_value, force=True)
print(f"Force set {field_name} = {field_value}")
except Exception as force_error:
print(f"Force setting also failed for {field_name}: {force_error}")
return True
except mechanicalsoup.LinkNotFoundError as e:
print(f"Form selection failed: {e}")
return False
except Exception as e:
print(f"Unexpected form filling error: {e}")
return False
# Usage
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")
form_data = {
"custname": "Test User",
"custtel": "555-0000",
"nonexistent_field": "test_value"
}
if safe_form_fill(browser, form_data):
try:
response = browser.submit_selected()
print("Form submitted successfully")
except Exception as e:
print(f"Form submission failed: {e}")Handle link discovery and following with fallback strategies.
import mechanicalsoup
import re
def follow_link_safe(browser, search_criteria, fallback_url=None):
"""
Safely follow links with multiple search strategies and fallbacks.
"""
try:
# Try to find link by various criteria
link = None
if isinstance(search_criteria, str):
# Search by text
link = browser.find_link(link_text=search_criteria)
elif isinstance(search_criteria, dict):
# Search by multiple criteria
link = browser.find_link(**search_criteria)
if link:
browser.follow_link(link)
print(f"Successfully followed link to: {browser.url}")
return True
else:
print("Link not found with given criteria")
# Fallback to direct URL if provided
if fallback_url:
print(f"Using fallback URL: {fallback_url}")
browser.open(fallback_url)
return True
except mechanicalsoup.LinkNotFoundError as e:
print(f"Link following failed: {e}")
if fallback_url:
print(f"Using fallback URL: {fallback_url}")
browser.open(fallback_url)
return True
except Exception as e:
print(f"Unexpected link following error: {e}")
return False
# Usage
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/")
# Try multiple search strategies
search_strategies = [
"JSON", # Search by text
{"url_regex": re.compile(r"/json")}, # Search by URL pattern
{"href": "/json"} # Search by exact href
]
for strategy in search_strategies:
if follow_link_safe(browser, strategy, fallback_url="https://httpbin.org/json"):
breakHandle file upload forms with proper multipart detection.
import mechanicalsoup
from mechanicalsoup import is_multipart_file_upload
import os
def handle_file_upload_form(browser, file_field_name, file_path, form_data=None):
"""
Handle file upload forms with proper validation and error handling.
"""
try:
# Ensure form is selected
if not browser.form:
browser.select_form()
form_element = browser.form.form
file_input = form_element.find("input", {"name": file_field_name, "type": "file"})
if not file_input:
raise mechanicalsoup.InvalidFormMethod(f"File input '{file_field_name}' not found")
# Verify multipart capability
if not is_multipart_file_upload(form_element, file_input):
print("Warning: Form may not support file uploads properly")
# Check file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
# Fill other form fields if provided
if form_data:
for field_name, field_value in form_data.items():
browser[field_name] = field_value
# Handle file upload (typically done through requests)
with open(file_path, 'rb') as file:
files = {file_field_name: file}
response = browser.session.post(
browser.absolute_url(form_element.get('action', '')),
files=files,
data={input.get('name'): input.get('value', '')
for input in form_element.find_all('input')
if input.get('type') != 'file'}
)
print(f"File upload successful: {response.status_code}")
return response
except mechanicalsoup.InvalidFormMethod as e:
print(f"Invalid form method for file upload: {e}")
except FileNotFoundError as e:
print(f"File error: {e}")
except Exception as e:
print(f"File upload error: {e}")
return None
# Usage example (conceptual - would need actual file upload form)
browser = mechanicalsoup.StatefulBrowser()
# browser.open("https://file-upload-form.example.com")
#
# response = handle_file_upload_form(
# browser,
# file_field_name="document",
# file_path="/path/to/document.pdf",
# form_data={"description": "Important document"}
# )Exception
└── LinkNotFoundError
└── InvalidFormMethodAll MechanicalSoup-specific exceptions inherit from LinkNotFoundError, making it easy to catch all library-related errors:
import mechanicalsoup
browser = mechanicalsoup.StatefulBrowser(raise_on_404=True)
try:
browser.open("https://httpbin.org/status/404")
browser.select_form("nonexistent")
browser["field"] = "value"
browser.submit_selected()
except mechanicalsoup.LinkNotFoundError as e:
# Catches both LinkNotFoundError and InvalidFormMethod
print(f"MechanicalSoup operation failed: {e}")
except Exception as e:
# Catches other unexpected errors
print(f"Unexpected error: {e}")Access the MechanicalSoup package version.
__version__: str
"""The version string of the installed MechanicalSoup package."""Usage Example:
import mechanicalsoup
# Check the installed version
print(f"MechanicalSoup version: {mechanicalsoup.__version__}")
# Use version for compatibility checks
from packaging import version
if version.parse(mechanicalsoup.__version__) >= version.parse("1.4.0"):
print("Using modern MechanicalSoup with latest features")
else:
print("Consider upgrading MechanicalSoup")Install with Tessl CLI
npx tessl i tessl/pypi-mechanicalsoup