CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mechanicalsoup

A Python library for automating interaction with websites, providing web scraping and form submission capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

forms.mddocs/

Form Handling

HTML form manipulation and field setting capabilities. The Form class provides comprehensive support for all standard form elements including inputs, checkboxes, radio buttons, selects, and textareas with automatic type detection and validation.

Capabilities

Form Creation

Create Form instances from BeautifulSoup form elements.

class Form:
    def __init__(self, form):
        """
        Create a Form instance from a BeautifulSoup form element.
        
        Parameters:
        - form: bs4.element.Tag representing a form element
        """

Usage Example:

import mechanicalsoup
from mechanicalsoup import Form

browser = mechanicalsoup.StatefulBrowser()
response = browser.open("https://httpbin.org/forms/post")

# Create form from BeautifulSoup element
form_element = response.soup.find("form")
form = Form(form_element)

# Or use StatefulBrowser to select and get form automatically
browser.select_form()
form = browser.form

Universal Field Setting

Auto-detect field types and set values appropriately.

def set(self, name, value, force=False):
    """
    Auto-detect field type and set value.
    
    Parameters:
    - name: Field name attribute
    - value: Value to set
    - force: If True, force setting even if field not found
    """

def __setitem__(self, name, value):
    """Set form field value using bracket notation (calls set())"""

Usage Example:

form = Form(form_element)

# Auto-detect and set any field type
form.set("username", "john_doe")
form.set("age", 25)
form.set("newsletter", True)

# Using bracket notation (recommended)
form["username"] = "john_doe"
form["age"] = 25
form["newsletter"] = True

Input Field Handling

Handle text inputs, hidden fields, and other input elements.

def set_input(self, data):
    """
    Set input field values from dictionary.
    
    Parameters:
    - data: Dict mapping field names to values
    """

Usage Example:

form = Form(form_element)

# Set multiple input fields at once
form.set_input({
    "first_name": "John",
    "last_name": "Doe", 
    "email": "john@example.com",
    "phone": "555-1234"
})

# Individual field setting
form["password"] = "secret123"
form["confirm_password"] = "secret123"

Checkbox Handling

Manage checkbox selections with flexible options.

def set_checkbox(self, data, uncheck_other_boxes=True):
    """
    Check/uncheck checkboxes.
    
    Parameters:
    - data: Dict mapping checkbox names to boolean values,
            or dict mapping names to lists of values for multi-value checkboxes
    - uncheck_other_boxes: Whether to uncheck other checkboxes with same name
    """

def check(self, data):
    """Backward-compatible checkbox/radio setting method"""

def uncheck_all(self, name):
    """
    Uncheck all checkboxes with given name.
    
    Parameters:
    - name: Checkbox name attribute
    """

Usage Example:

form = Form(form_element)

# Check/uncheck single checkboxes
form.set_checkbox({
    "subscribe_newsletter": True,
    "accept_terms": True,
    "marketing_emails": False
})

# Handle multi-value checkboxes (multiple checkboxes with same name)
form.set_checkbox({
    "interests": ["sports", "music", "travel"]
})

# Uncheck all checkboxes with specific name
form.uncheck_all("previous_selections")

# Legacy method
form.check({"newsletter": True})

Radio Button Handling

Select radio button options.

def set_radio(self, data):
    """
    Select radio button options.
    
    Parameters:
    - data: Dict mapping radio group names to selected values
    """

Usage Example:

form = Form(form_element)

# Select radio button options
form.set_radio({
    "gender": "female",
    "age_group": "25-34", 
    "preferred_contact": "email"
})

# Can also use universal set method
form["size"] = "large"  # Works for radio buttons too

Textarea Handling

Set content for textarea elements.

def set_textarea(self, data):
    """
    Set textarea content.
    
    Parameters:
    - data: Dict mapping textarea names to content strings
    """

Usage Example:

form = Form(form_element)

# Set textarea content
form.set_textarea({
    "comments": "This is a multi-line comment\nwith line breaks.",
    "bio": "Software developer with 5 years experience..."
})

# Using universal set method
form["message"] = "Hello, this works for textareas too!"

Select/Dropdown Handling

Handle select elements and option selection.

def set_select(self, data):
    """
    Select dropdown options.
    
    Parameters:
    - data: Dict mapping select names to selected values,
            or lists of values for multi-select elements
    """

Usage Example:

form = Form(form_element)

# Single select dropdown
form.set_select({
    "country": "United States",
    "state": "California",
    "priority": "high"
})

# Multi-select elements
form.set_select({
    "languages": ["Python", "JavaScript", "Go"],
    "skills": ["web_dev", "databases", "apis"]
})

# Using universal method
form["department"] = "Engineering"

Form Control Management

Add new form controls and manage submit buttons.

def new_control(self, type, name, value, **kwargs):
    """
    Add new input element to form.
    
    Parameters:
    - type: Input type (text, hidden, submit, etc.)
    - name: Control name attribute
    - value: Control value attribute
    - **kwargs: Additional HTML attributes
    """

def choose_submit(self, submit):
    """
    Select which submit button to use for form submission.
    
    Parameters:
    - submit: Submit button element or name/value criteria
    """

Usage Example:

form = Form(form_element)

# Add hidden fields
form.new_control("hidden", "csrf_token", "abc123")
form.new_control("hidden", "session_id", "xyz789")

# Add custom input with attributes
form.new_control("text", "dynamic_field", "", 
                 placeholder="Enter value", 
                 class_="custom-input")

# Choose specific submit button
form.choose_submit("save_draft")  # Use button with name="save_draft"

Form Debugging

Debug and inspect form structure.

def print_summary(self):
    """Print form structure and field information for debugging"""

Usage Example:

form = Form(form_element)

# Print complete form structure
form.print_summary()

# Example output:
# Form summary:
# action: /submit
# method: POST
# Fields:
#   - username (text): ""
#   - password (password): ""
#   - remember_me (checkbox): unchecked
#   - submit (submit): "Login"

Legacy Compatibility Methods

Deprecated methods maintained for backward compatibility.

def attach(self):
    """Deprecated: Use set_input() instead"""

def input(self):
    """Deprecated: Use set_input() instead"""  

def textarea(self):
    """Deprecated: Use set_textarea() instead"""

Public Attributes

# Form instance attributes
form: bs4.element.Tag    # The underlying BeautifulSoup form element

Complete Form Handling Example

import mechanicalsoup

# Navigate to form page
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://httpbin.org/forms/post")

# Select and examine form
browser.select_form('form[action="/post"]')
form = browser.form

# Print form structure for debugging
form.print_summary()

# Fill various field types
form["custname"] = "Alice Johnson"
form["custtel"] = "555-0123"

# Handle radio buttons and checkboxes
form.set_radio({"size": "medium"})
form.set_checkbox({"newsletter": True})

# Add dynamic fields
form.new_control("hidden", "source", "web_form")
form.new_control("hidden", "timestamp", "2024-01-15T10:30:00Z")

# Set textarea content
form.set_textarea({"comments": "Great service, very satisfied!"})

# Choose specific submit button if multiple exist
form.choose_submit("submit_order")

# Submit form
response = browser.submit_selected()
print("Form submitted:", response.json())

Form Validation and Error Handling

import mechanicalsoup

try:
    browser = mechanicalsoup.StatefulBrowser()
    browser.open("https://httpbin.org/forms/post")
    
    # Select form
    browser.select_form()
    
    # Attempt to set non-existent field
    try:
        browser.form.set("nonexistent_field", "value", force=False)
    except mechanicalsoup.InvalidFormMethod:
        print("Field not found in form")
    
    # Force setting (creates new field)
    browser.form.set("nonexistent_field", "value", force=True)
    
    # Submit and handle errors
    response = browser.submit_selected()
    
except mechanicalsoup.LinkNotFoundError as e:
    print(f"Navigation error: {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

Secure File Upload Handling

File uploads in forms require special security considerations. Since MechanicalSoup v1.3.0, file inputs must be open file objects (IOBase) rather than file paths to prevent security vulnerabilities.

def _assert_valid_file_upload(self, tag, value):
    """
    Internal validation for secure file uploads.
    
    Raises ValueError if attempting to upload non-IOBase objects to 
    multipart file inputs (security mitigation for CVE-2023-34457).
    """

Secure File Upload Example:

import mechanicalsoup
import io
from mechanicalsoup import is_multipart_file_upload

def secure_file_upload_example():
    """
    Demonstrate secure file upload handling.
    """
    browser = mechanicalsoup.StatefulBrowser()
    
    # Navigate to a form with file upload
    # browser.open("https://example.com/upload-form")
    # browser.select_form()
    
    # SECURE: Use open file objects directly
    with open("/path/to/document.pdf", "rb") as file_obj:
        # This is the secure way (v1.3.0+)
        browser["document"] = file_obj
        
        # Fill other form fields
        browser["description"] = "Important document"
        browser["category"] = "reports"
        
        # Submit while file is still open
        response = browser.submit_selected()
        print(f"Upload successful: {response.status_code}")
    
    # Alternative: Create in-memory file
    file_content = b"Hello, this is file content"
    file_obj = io.BytesIO(file_content)
    file_obj.name = "test.txt"  # Optional: set filename
    
    browser["text_file"] = file_obj
    response = browser.submit_selected()

# INSECURE: These patterns will raise ValueError in v1.3.0+
def insecure_patterns_to_avoid():
    """
    Examples of patterns that will raise security exceptions.
    """
    browser = mechanicalsoup.StatefulBrowser()
    # browser.open("https://example.com/upload-form") 
    # browser.select_form()
    
    try:
        # This will raise ValueError (CVE-2023-34457 mitigation)
        browser["document"] = "/path/to/file.pdf"  # String path - INSECURE
    except ValueError as e:
        print(f"Security error: {e}")
        
    try:
        # This will also raise ValueError  
        browser["document"] = b"raw bytes"  # Bytes - INSECURE
    except ValueError as e:
        print(f"Security error: {e}")

# Check if form supports file uploads
def check_file_upload_support():
    """
    Verify form supports secure file uploads.
    """
    browser = mechanicalsoup.StatefulBrowser()
    # browser.open("https://example.com/form")
    # browser.select_form()
    
    form_element = browser.form.form
    file_inputs = form_element.find_all("input", {"type": "file"})
    
    for file_input in file_inputs:
        field_name = file_input.get("name")
        if is_multipart_file_upload(form_element, file_input):
            print(f"Field '{field_name}' supports secure file upload")
        else:
            print(f"Warning: Field '{field_name}' may not support file upload properly")

# Usage examples
if __name__ == "__main__":
    secure_file_upload_example()
    check_file_upload_support()

Key Security Notes:

  • Always use open file objects (IOBase instances) for file uploads
  • Never pass file paths as strings - this creates security vulnerabilities
  • Use context managers (with open(...)) to ensure files are properly closed
  • Test multipart support with is_multipart_file_upload() before uploading
  • Handle ValueError exceptions for invalid file upload attempts

Install with Tessl CLI

npx tessl i tessl/pypi-mechanicalsoup

docs

browser.md

forms.md

index.md

navigation.md

utilities.md

tile.json