URL manipulation made simple.
65
Furl provides a comprehensive set of utility functions for URL validation, encoding, parsing, and manipulation operations. These functions can be used independently or as part of the main furl functionality.
Enhanced URL parsing functions that extend Python's standard urllib functionality.
def urlsplit(url):
"""
Split URL into components with enhanced parsing.
Args:
url (str): URL string to split
Returns:
SplitResult: Named tuple with URL components
"""
def urljoin(base, url):
"""
Join base URL with relative URL using enhanced logic.
Args:
base (str): Base URL string
url (str): URL string to join (can be relative or absolute)
Returns:
str: Joined URL string
"""Usage:
from furl import urlsplit, urljoin
# Enhanced URL splitting
result = urlsplit('https://user:pass@example.com:8080/path?query=value#fragment')
print(result.scheme) # 'https'
print(result.netloc) # 'user:pass@example.com:8080'
print(result.path) # '/path'
print(result.query) # 'query=value'
print(result.fragment) # 'fragment'
# URL joining
base = 'https://example.com/api/v1/'
endpoint = 'users/123'
full_url = urljoin(base, endpoint)
print(full_url) # 'https://example.com/api/v1/users/123'
# Join with absolute URL (replaces base)
absolute = 'https://different.com/other'
result = urljoin(base, absolute)
print(result) # 'https://different.com/other'Functions for extracting, validating, and manipulating URL schemes.
def get_scheme(url):
"""
Extract scheme from URL string.
Args:
url (str): URL string
Returns:
str | None: Scheme string or None if no scheme
"""
def strip_scheme(url):
"""
Remove scheme from URL string.
Args:
url (str): URL string
Returns:
str: URL string without scheme
"""
def set_scheme(url, scheme):
"""
Set or replace scheme in URL string.
Args:
url (str): URL string
scheme (str): New scheme to set
Returns:
str: URL string with new scheme
"""
def is_valid_scheme(scheme):
"""
Validate URL scheme format.
Args:
scheme (str): Scheme string to validate
Returns:
bool: True if scheme is valid
"""
def has_netloc(url):
"""
Check if URL has network location component.
Args:
url (str): URL string to check
Returns:
bool: True if URL has netloc
"""Usage:
from furl import get_scheme, strip_scheme, set_scheme, is_valid_scheme, has_netloc
url = 'https://example.com/path'
# Extract scheme
scheme = get_scheme(url)
print(scheme) # 'https'
# Remove scheme
no_scheme = strip_scheme(url)
print(no_scheme) # '//example.com/path'
# Set new scheme
ftp_url = set_scheme(url, 'ftp')
print(ftp_url) # 'ftp://example.com/path'
# Validate scheme
print(is_valid_scheme('https')) # True
print(is_valid_scheme('ht-tps')) # False (invalid characters)
# Check for network location
print(has_netloc('https://example.com/path')) # True
print(has_netloc('/just/a/path')) # FalseFunctions for manipulating URL path segments and components.
def join_path_segments(*args):
"""
Join multiple path segments into a single path.
Args:
*args: Path segments to join (strings)
Returns:
str: Joined path string
"""
def remove_path_segments(segments, remove):
"""
Remove specified segments from path segments list.
Args:
segments (list): List of path segments
remove (list|str): Segments to remove
Returns:
list: Updated segments list
"""
def quacks_like_a_path_with_segments(obj):
"""
Duck typing check for path-like objects with segments.
Args:
obj: Object to check
Returns:
bool: True if object behaves like a path with segments
"""Usage:
from furl import join_path_segments, remove_path_segments, quacks_like_a_path_with_segments
# Join path segments
path = join_path_segments('api', 'v1', 'users', '123')
print(path) # 'api/v1/users/123'
# Remove segments from list
segments = ['api', 'v1', 'users', '123', 'profile']
updated = remove_path_segments(segments, ['v1', 'profile'])
print(updated) # ['api', 'users', '123']
# Duck typing check
from furl import Path
path_obj = Path('/api/v1/users')
print(quacks_like_a_path_with_segments(path_obj)) # True
print(quacks_like_a_path_with_segments("string")) # FalseFunctions for validating various URL components and formats.
def is_valid_host(hostname):
"""
Validate hostname format.
Args:
hostname (str): Hostname to validate
Returns:
bool: True if hostname is valid
"""
def is_valid_port(port):
"""
Validate port number.
Args:
port (int|str): Port number to validate
Returns:
bool: True if port is valid (1-65535)
"""
def is_valid_encoded_path_segment(segment):
"""
Validate percent-encoded path segment.
Args:
segment (str): Path segment to validate
Returns:
bool: True if segment is properly encoded
"""
def is_valid_encoded_query_key(key):
"""
Validate percent-encoded query parameter key.
Args:
key (str): Query key to validate
Returns:
bool: True if key is properly encoded
"""
def is_valid_encoded_query_value(value):
"""
Validate percent-encoded query parameter value.
Args:
value (str): Query value to validate
Returns:
bool: True if value is properly encoded
"""Usage:
from furl import (is_valid_host, is_valid_port, is_valid_encoded_path_segment,
is_valid_encoded_query_key, is_valid_encoded_query_value)
# Validate hostname
print(is_valid_host('example.com')) # True
print(is_valid_host('sub.example.com')) # True
print(is_valid_host('192.168.1.1')) # True
print(is_valid_host('invalid..host')) # False
# Validate port
print(is_valid_port(80)) # True
print(is_valid_port('443')) # True
print(is_valid_port(0)) # False
print(is_valid_port(99999)) # False
# Validate encoded components
print(is_valid_encoded_path_segment('users')) # True
print(is_valid_encoded_path_segment('user%20name')) # True
print(is_valid_encoded_path_segment('user name')) # False (not encoded)
print(is_valid_encoded_query_key('search_term')) # True
print(is_valid_encoded_query_key('search%20term')) # True
print(is_valid_encoded_query_value('hello%20world')) # True
print(is_valid_encoded_query_value('hello world')) # FalseFunctions for handling character encoding and IDNA (Internationalized Domain Names).
def utf8(obj, default=None):
"""
Convert object to UTF-8 encoded string.
Args:
obj: Object to convert
default: Default value if conversion fails
Returns:
str: UTF-8 encoded string
"""
def idna_encode(hostname):
"""
Encode hostname using IDNA (Internationalized Domain Names).
Args:
hostname (str): Hostname to encode
Returns:
str: IDNA encoded hostname
"""
def idna_decode(hostname):
"""
Decode IDNA encoded hostname.
Args:
hostname (str): IDNA encoded hostname
Returns:
str: Decoded hostname
"""
def attemptstr(obj):
"""
Attempt to convert object to string.
Args:
obj: Object to convert
Returns:
str: String representation or original object
"""
def non_string_iterable(obj):
"""
Check if object is iterable but not a string.
Args:
obj: Object to check
Returns:
bool: True if iterable but not string
"""Usage:
from furl import utf8, idna_encode, idna_decode, attemptstr, non_string_iterable
# UTF-8 encoding
text = utf8('Hello 世界')
print(text) # Properly encoded UTF-8 string
# IDNA encoding for international domain names
international_domain = 'тест.example'
encoded = idna_encode(international_domain)
print(encoded) # 'xn--e1aybc.example'
# IDNA decoding
decoded = idna_decode(encoded)
print(decoded) # 'тест.example'
# String conversion
print(attemptstr(123)) # '123'
print(attemptstr(['a'])) # "['a']"
# Check for non-string iterables
print(non_string_iterable(['a', 'b'])) # True
print(non_string_iterable('string')) # False
print(non_string_iterable(123)) # FalseAdditional utility functions for common operations.
def lget(lst, index, default=None):
"""
Safe list index access with default value.
Args:
lst (list): List to access
index (int): Index to access
default: Default value if index out of bounds
Returns:
Any: List item or default value
"""
def static_vars(**kwargs):
"""
Decorator to add static variables to functions.
Args:
**kwargs: Static variables to add
Returns:
function: Decorated function with static variables
"""
def create_quote_fn(safe_charset, quote_plus):
"""
Create custom URL quoting function.
Args:
safe_charset (str): Characters considered safe (not to quote)
quote_plus (bool): Use '+' for spaces instead of '%20'
Returns:
function: Custom quoting function
"""Usage:
from furl import lget
# Safe list access
items = ['a', 'b', 'c']
print(lget(items, 1)) # 'b'
print(lget(items, 10)) # None
print(lget(items, 10, 'default')) # 'default'
# Safe access with empty list
empty = []
print(lget(empty, 0, 'fallback')) # 'fallback'Dictionary mapping URL schemes to their default ports.
DEFAULT_PORTS = {
'http': 80,
'https': 443,
'ftp': 21,
'ssh': 22,
'telnet': 23,
# ... and 34 more common protocols
}Usage:
from furl import DEFAULT_PORTS
# Check default port for scheme
print(DEFAULT_PORTS.get('https')) # 443
print(DEFAULT_PORTS.get('ftp')) # 21
print(DEFAULT_PORTS.get('unknown')) # None
# List all supported schemes
print(list(DEFAULT_PORTS.keys()))Regular expression patterns used for validation.
PERCENT_REGEX = r'\%[a-fA-F\d][a-fA-F\d]' # Pattern for percent-encoded chars
INVALID_HOST_CHARS = '!@#$%^&\'"*()+=:;/' # Invalid characters in hostnamesAdditional functions for specialized operations.
def static_vars(**kwargs):
"""
Decorator to add static variables to functions.
Args:
**kwargs: Static variables to add
Returns:
function: Decorated function with static variables
"""
def create_quote_fn(safe_charset, quote_plus):
"""
Create custom URL quoting function.
Args:
safe_charset (str): Characters considered safe (not to quote)
quote_plus (bool): Use '+' for spaces instead of '%20'
Returns:
function: Custom quoting function
"""Utility functions handle various error conditions gracefully:
from furl import is_valid_host, utf8
# Handle invalid input gracefully
print(is_valid_host(None)) # False (doesn't crash)
print(utf8(None, 'fallback')) # 'fallback'
# Handle encoding errors
try:
result = idna_encode('invalid..domain')
except Exception as e:
print(f"Encoding error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-furlevals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10