When they're not builtins, they're boltons.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive text manipulation including case conversion, slugification, text formatting, HTML processing, ANSI handling, compression, and advanced string operations with internationalization support. Provides utilities for common text processing tasks with robust encoding and formatting capabilities.
Convert between different string case formats.
def camel2under(camel_string):
"""
Convert CamelCase to under_score.
Parameters:
- camel_string (str): CamelCase string to convert
Returns:
str: Converted under_score string
"""
def under2camel(under_string):
"""
Convert under_score to CamelCase.
Parameters:
- under_string (str): under_score string to convert
Returns:
str: Converted CamelCase string
"""Convert text to URL-safe slugs and identifiers.
def slugify(text, delim='_', lower=True, ascii=False):
"""
Convert text to URL-safe slug.
Parameters:
- text (str): Text to slugify
- delim (str): Delimiter character (default: '_')
- lower (bool): Convert to lowercase (default: True)
- ascii (bool): Force ASCII output (default: False)
Returns:
str: URL-safe slug
"""
def a10n(string):
"""
Create internationalization-style abbreviation (a11y, i18n, etc.).
Parameters:
- string (str): String to abbreviate
Returns:
str: Abbreviated form (first + count + last)
"""Advanced text processing and formatting utilities.
def split_punct_ws(text):
"""
Split text on punctuation and whitespace.
Parameters:
- text (str): Text to split
Returns:
list: List of text segments
"""
def unit_len(sized_iterable, unit_noun='item'):
"""
Format count with unit noun.
Parameters:
- sized_iterable: Iterable with __len__
- unit_noun (str): Singular noun for the unit
Returns:
str: Formatted count with proper pluralization
"""
def ordinalize(number, ext_only=False):
"""
Convert number to ordinal (1st, 2nd, etc.).
Parameters:
- number (int): Number to convert
- ext_only (bool): Return only the suffix (default: False)
Returns:
str: Ordinal number or suffix
"""
def cardinalize(unit_noun, count):
"""
Pluralize unit noun based on count.
Parameters:
- unit_noun (str): Singular noun
- count (int): Count to determine pluralization
Returns:
str: Properly pluralized noun
"""
def singularize(word):
"""
Convert plural word to singular form.
Parameters:
- word (str): Plural word
Returns:
str: Singular form
"""
def pluralize(word):
"""
Convert singular word to plural form.
Parameters:
- word (str): Singular word
Returns:
str: Plural form
"""Extract and analyze text content.
def find_hashtags(string):
"""
Extract hashtags from text.
Parameters:
- string (str): Text containing hashtags
Returns:
list: List of hashtag strings (including #)
"""
def is_uuid(string):
"""
Check if string is valid UUID format.
Parameters:
- string (str): String to check
Returns:
bool: True if valid UUID format
"""
def is_ascii(text):
"""
Check if text contains only ASCII characters.
Parameters:
- text (str): Text to check
Returns:
bool: True if text is ASCII-only
"""Clean and normalize text content.
def strip_ansi(text):
"""
Remove ANSI escape sequences from text.
Parameters:
- text (str): Text with ANSI sequences
Returns:
str: Text with ANSI sequences removed
"""
def asciify(text, ignore=False):
"""
Convert text to ASCII by removing diacritics.
Parameters:
- text (str): Text to convert
- ignore (bool): Ignore non-convertible characters
Returns:
str: ASCII-compatible text
"""
def unwrap_text(text, **kwargs):
"""
Unwrap text by removing line breaks.
Parameters:
- text (str): Text to unwrap
Returns:
str: Text with line breaks removed appropriately
"""
def indent(text, prefix):
"""
Indent text lines with prefix.
Parameters:
- text (str): Text to indent
- prefix (str): Prefix to add to each line
Returns:
str: Indented text
"""Extract and process HTML content.
def html2text(html_text):
"""
Extract plain text from HTML string.
Parameters:
- html_text (str): HTML content
Returns:
str: Plain text content
"""
class HTMLTextExtractor(HTMLParser):
"""Extract plain text from HTML."""
def __init__(self): ...
def handle_data(self, data): ...
def get_text(self): ...Format data for human consumption.
def bytes2human(nbytes, ndigits=0):
"""
Convert bytes to human readable format.
Parameters:
- nbytes (int): Number of bytes
- ndigits (int): Number of decimal places
Returns:
str: Human readable size (e.g., "1.5 MB")
"""Text compression and decompression utilities.
def gunzip_bytes(data):
"""
Decompress gzip bytes.
Parameters:
- data (bytes): Gzipped data
Returns:
bytes: Decompressed data
"""
def gzip_bytes(data):
"""
Compress data to gzip bytes.
Parameters:
- data (bytes): Data to compress
Returns:
bytes: Gzipped data
"""Efficient multiple string replacement operations.
def multi_replace(input_string, sub_map, **kwargs):
"""
Efficient multiple string replacement.
Parameters:
- input_string (str): String to process
- sub_map (dict): Mapping of old -> new strings
Returns:
str: String with all replacements made
"""
class MultiReplace:
"""Efficient multiple string replacement."""
def __init__(self, sub_map): ...
def __call__(self, input_string): ...Escape and format shell command arguments.
def escape_shell_args(args, sep=' ', style=None):
"""
Escape shell command arguments.
Parameters:
- args (list): List of arguments
- sep (str): Separator between arguments
- style (str): Shell style ('sh', 'cmd', etc.)
Returns:
str: Escaped shell command string
"""
def args2sh(args, sep=' '):
"""
Convert args to shell-escaped string.
Parameters:
- args (list): List of arguments
- sep (str): Separator between arguments
Returns:
str: Shell-escaped command string
"""
def args2cmd(args, sep=' '):
"""
Convert args to cmd.exe-escaped string.
Parameters:
- args (list): List of arguments
- sep (str): Separator between arguments
Returns:
str: CMD-escaped command string
"""Parse and format integer ranges and lists.
def parse_int_list(range_string, **kwargs):
"""
Parse integer ranges from string.
Parameters:
- range_string (str): String like "1-5,7,9-12"
Returns:
list: List of integers
"""
def format_int_list(int_list, **kwargs):
"""
Format integer list as range string.
Parameters:
- int_list (list): List of integers
Returns:
str: Formatted range string
"""
def complement_int_list(range_string, **kwargs):
"""
Get complement of integer ranges.
Parameters:
- range_string (str): Range string to complement
Returns:
str: Complement range string
"""
def int_ranges_from_int_list(int_list):
"""
Convert integer list to ranges.
Parameters:
- int_list (list): List of integers
Returns:
list: List of (start, end) tuples
"""Process large text files efficiently.
def iter_splitlines(text):
"""
Memory-efficient line iteration.
Parameters:
- text (str): Text to split into lines
Yields:
str: Each line
"""from boltons.strutils import (
slugify, camel2under, under2camel, bytes2human,
strip_ansi, html2text, multi_replace, find_hashtags
)
# Create URL-friendly slugs
title = "Hello, World! This is a test."
slug = slugify(title)
print(slug) # "hello-world-this-is-a-test"
# Case conversion
camel = "myVariableName"
under = camel2under(camel)
print(under) # "my_variable_name"
back_to_camel = under2camel(under)
print(back_to_camel) # "myVariableName"
# Human-readable byte sizes
size = bytes2human(1536)
print(size) # "1.5 KB"
# Clean ANSI escape sequences
ansi_text = "\033[31mRed text\033[0m"
clean = strip_ansi(ansi_text)
print(clean) # "Red text"
# Extract text from HTML
html = "<p>Hello <b>world</b>!</p>"
text = html2text(html)
print(text) # "Hello world!"
# Multiple string replacements
text = "Hello world, hello universe"
replacements = {"hello": "hi", "world": "earth"}
result = multi_replace(text, replacements)
print(result) # "Hi earth, hi universe"
# Find hashtags in text
social_text = "Check out #python and #boltons!"
tags = find_hashtags(social_text)
print(tags) # ["#python", "#boltons"]from boltons.strutils import (
ordinalize, cardinalize, pluralize, singularize,
parse_int_list, format_int_list, asciify
)
# Number formatting
print(ordinalize(1)) # "1st"
print(ordinalize(22)) # "22nd"
print(ordinalize(103)) # "103rd"
# Pluralization
print(cardinalize("item", 1)) # "1 item"
print(cardinalize("item", 5)) # "5 items"
print(pluralize("child")) # "children"
print(singularize("children")) # "child"
# Integer range processing
ranges = "1-5,7,9-12"
numbers = parse_int_list(ranges)
print(numbers) # [1, 2, 3, 4, 5, 7, 9, 10, 11, 12]
formatted = format_int_list([1, 2, 3, 5, 6, 8])
print(formatted) # "1-3,5-6,8"
# Text normalization
accented = "café résumé naïve"
ascii_text = asciify(accented)
print(ascii_text) # "cafe resume naive"# Character mapping for removing diacritics
class DeaccenterDict(dict):
"""Dictionary for character deaccenting mappings."""
pass
# Regular expressions
HASHTAG_RE: re.Pattern # Pattern for matching hashtags
ANSI_SEQUENCES: re.Pattern # Pattern for ANSI escape sequences
# Character mappings
DEACCENT_MAP: dict # Mapping for removing diacritical marksInstall with Tessl CLI
npx tessl i tessl/pypi-boltons