A Python library for automating interaction with websites, providing web scraping and form submission capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Low-level browser functionality for HTTP requests with automatic BeautifulSoup parsing. The Browser class provides direct request/response handling with session management and is recommended for applications requiring fine-grained control over HTTP interactions.
Create a browser instance with optional session, parsing, and adapter configuration.
class Browser:
def __init__(self, session=None, soup_config={'features': 'lxml'},
requests_adapters=None, raise_on_404=False, user_agent=None):
"""
Create a Browser instance.
Parameters:
- session: Optional requests.Session instance
- soup_config: BeautifulSoup configuration dict
- requests_adapters: Requests adapter configuration
- raise_on_404: If True, raise LinkNotFoundError on 404 errors
- user_agent: Custom User-Agent string
"""Usage Example:
import mechanicalsoup
import requests
# Basic browser
browser = mechanicalsoup.Browser()
# Browser with custom session
session = requests.Session()
browser = mechanicalsoup.Browser(session=session)
# Browser with custom BeautifulSoup parser
browser = mechanicalsoup.Browser(soup_config={'features': 'html.parser'})
# Browser that raises on 404 errors
browser = mechanicalsoup.Browser(raise_on_404=True)Standard HTTP methods with automatic BeautifulSoup parsing of HTML responses.
def request(self, *args, **kwargs):
"""Low-level request method, forwards to session.request()"""
def get(self, *args, **kwargs):
"""HTTP GET request with soup parsing"""
def post(self, *args, **kwargs):
"""HTTP POST request with soup parsing"""
def put(self, *args, **kwargs):
"""HTTP PUT request with soup parsing"""Usage Example:
browser = mechanicalsoup.Browser()
# GET request
response = browser.get("https://httpbin.org/get")
print(response.soup.title.string)
# POST request with data
response = browser.post("https://httpbin.org/post",
data={"key": "value"})
# PUT request with data
response = browser.put("https://httpbin.org/put",
json={"updated_key": "updated_value"})
# Request with headers
response = browser.get("https://httpbin.org/headers",
headers={"Custom-Header": "value"})Submit HTML forms with automatic data extraction and encoding.
def submit(self, form, url=None, **kwargs):
"""
Submit a form object.
Parameters:
- form: Form instance to submit
- url: Optional URL override for form action
- **kwargs: Additional request parameters
Returns:
requests.Response with soup attribute
"""Usage Example:
from mechanicalsoup import Browser, Form
browser = Browser()
response = browser.get("https://httpbin.org/forms/post")
# Create and fill form
form = Form(response.soup.find("form"))
form["custname"] = "John Doe"
# Submit form
result = browser.submit(form)
print(result.soup)Manage cookies and session state for authenticated or persistent interactions.
def set_cookiejar(self, cookiejar):
"""Replace the current cookiejar in the requests session"""
def get_cookiejar(self):
"""Get the current cookiejar from the requests session"""Usage Example:
import mechanicalsoup
from http.cookiejar import CookieJar
browser = mechanicalsoup.Browser()
# Get current cookies
cookies = browser.get_cookiejar()
# Set new cookie jar
new_jar = CookieJar()
browser.set_cookiejar(new_jar)Set and manage the User-Agent header for requests.
def set_user_agent(self, user_agent):
"""
Set the User-Agent header for requests.
Parameters:
- user_agent: String to use as User-Agent, or None for default
"""Usage Example:
browser = mechanicalsoup.Browser()
# Set custom user agent
browser.set_user_agent("MyBot/1.0 (Contact: admin@example.com)")
# Reset to default
browser.set_user_agent(None)Tools for debugging and development workflow.
def launch_browser(self, soup):
"""
Launch external browser with page content for debugging.
Parameters:
- soup: BeautifulSoup object to display
"""Clean up browser resources and close connections.
def close(self):
"""Close the session and clear cookies"""Usage Example:
browser = mechanicalsoup.Browser()
try:
response = browser.get("https://example.com")
# Use response...
finally:
browser.close()Browser supports context manager protocol for automatic resource cleanup.
def __enter__(self):
"""Enter context manager, returns self"""
def __exit__(self, *args):
"""Exit context manager, calls close() automatically"""Usage Example:
# Recommended approach using context manager
with mechanicalsoup.Browser() as browser:
response = browser.get("https://example.com")
# Process response...
response2 = browser.post("https://example.com/api", data={"key": "value"})
# Browser automatically closed when exiting with-block
# For long-running applications
with mechanicalsoup.Browser(user_agent="MyApp/1.0") as browser:
for url in urls:
try:
response = browser.get(url)
process_page(response.soup)
except Exception as e:
print(f"Error processing {url}: {e}")Helper methods for response processing and form data extraction.
@staticmethod
def add_soup(response, soup_config):
"""
Attach a BeautifulSoup object to a requests response.
Parameters:
- response: requests.Response object
- soup_config: BeautifulSoup configuration dict
"""
@staticmethod
def get_request_kwargs(form, url=None, **kwargs):
"""
Extract form data for request submission.
Parameters:
- form: Form instance
- url: Optional URL override
- **kwargs: Additional parameters
Returns:
Dict with request parameters
"""# Browser instance attributes
session: requests.Session # The underlying requests session
soup_config: Dict[str, Any] # BeautifulSoup configuration
raise_on_404: bool # Whether to raise on 404 errorsThe Browser class can raise LinkNotFoundError when raise_on_404=True and a 404 error occurs:
import mechanicalsoup
browser = mechanicalsoup.Browser(raise_on_404=True)
try:
response = browser.get("https://httpbin.org/status/404")
except mechanicalsoup.LinkNotFoundError:
print("Page not found!")Install with Tessl CLI
npx tessl i tessl/pypi-mechanicalsoup