A Python slugify application that handles Unicode text conversion to URL-friendly slugs
npx @tessl/cli install tessl/pypi-python-slugify@8.0.0A comprehensive Python library for converting Unicode text strings into URL-friendly slugs. Python Slugify handles complex Unicode characters from various languages by transliterating them to ASCII equivalents, while offering extensive customization options including custom separators, stopword filtering, length limits, regex patterns, and character replacements.
pip install python-slugifypip install python-slugify[unidecode] (for advanced Unicode handling)from slugify import slugifyAdditional utilities and special character mappings:
from slugify import slugify, smart_truncate
from slugify import PRE_TRANSLATIONS, CYRILLIC, GERMAN, GREEKVersion and metadata information:
from slugify import __version__, __title__, __author__, __description__Regex patterns and constants:
from slugify import DEFAULT_SEPARATOR
from slugify import CHAR_ENTITY_PATTERN, DECIMAL_PATTERN, HEX_PATTERNfrom slugify import slugify
# Basic text slugification
text = "This is a test ---"
result = slugify(text)
print(result) # "this-is-a-test"
# Unicode text handling
text = '影師嗎'
result = slugify(text)
print(result) # "ying-shi-ma"
# Preserve Unicode characters
text = '影師嗎'
result = slugify(text, allow_unicode=True)
print(result) # "影師嗎"
# Custom separator and length limits
text = 'C\'est déjà l\'été.'
result = slugify(text, separator='_', max_length=15)
print(result) # "c_est_deja_l_et"
# Using replacement rules
text = "50% off | great deal"
result = slugify(text, replacements=[['%', 'percent'], ['|', 'or']])
print(result) # "50-percent-off-or-great-deal"The main function for converting text to URL-friendly slugs with comprehensive Unicode support and customization options.
def slugify(
text: str,
entities: bool = True,
decimal: bool = True,
hexadecimal: bool = True,
max_length: int = 0,
word_boundary: bool = False,
separator: str = "-",
save_order: bool = False,
stopwords: Iterable[str] = (),
regex_pattern: re.Pattern[str] | str | None = None,
lowercase: bool = True,
replacements: Iterable[Iterable[str]] = (),
allow_unicode: bool = False,
) -> str:
"""
Convert text into a URL-friendly slug.
Parameters:
- text (str): Input text to slugify
- entities (bool): Convert HTML entities to unicode (default: True)
- decimal (bool): Convert HTML decimal entities to unicode (default: True)
- hexadecimal (bool): Convert HTML hexadecimal entities to unicode (default: True)
- max_length (int): Maximum output length, 0 for no limit (default: 0)
- word_boundary (bool): Truncate to complete words (default: False)
- separator (str): Separator between words (default: "-")
- save_order (bool): Preserve word order when truncating (default: False)
- stopwords (Iterable[str]): Words to exclude from output (default: ())
- regex_pattern (re.Pattern[str] | str | None): Custom regex for disallowed characters (default: None)
- lowercase (bool): Convert to lowercase (default: True)
- replacements (Iterable[Iterable[str]]): Custom replacement rules (default: ())
- allow_unicode (bool): Allow Unicode characters in output (default: False)
Returns:
str: URL-friendly slug
"""from slugify import slugify
# HTML entity handling
text = "foo & bar"
result = slugify(text) # "foo-bar"
# Stopword filtering
text = "The quick brown fox"
result = slugify(text, stopwords=['the', 'a', 'an']) # "quick-brown-fox"
# Custom regex pattern
import re
text = "Hello World 123"
pattern = re.compile(r'[^a-z]+')
result = slugify(text, regex_pattern=pattern) # "hello-world"
# Length limits with word boundaries
text = "This is a very long sentence"
result = slugify(text, max_length=15, word_boundary=True) # "this-is-a-very"
# Multiple replacement rules
text = "Price: $50 | 20% off"
replacements = [['$', 'dollar'], ['%', 'percent'], ['|', 'and']]
result = slugify(text, replacements=replacements) # "price-dollar50-and-20-percent-off"Intelligent string truncation with word boundary preservation and order control.
def smart_truncate(
string: str,
max_length: int = 0,
word_boundary: bool = False,
separator: str = " ",
save_order: bool = False,
) -> str:
"""
Intelligently truncate strings while preserving word boundaries.
Parameters:
- string (str): String to truncate
- max_length (int): Maximum length, 0 for no truncation (default: 0)
- word_boundary (bool): Respect word boundaries (default: False)
- separator (str): Word separator (default: " ")
- save_order (bool): Maintain original word order (default: False)
Returns:
str: Truncated string
"""from slugify import smart_truncate
# Basic truncation
text = "This is a long sentence"
result = smart_truncate(text, max_length=10) # "This is a "
# Word boundary preservation
text = "This is a long sentence"
result = smart_truncate(text, max_length=15, word_boundary=True) # "This is a long"
# Custom separator
text = "word1-word2-word3-word4"
result = smart_truncate(text, max_length=15, word_boundary=True, separator="-") # "word1-word2"Pre-defined character translation mappings for various languages, useful for custom transliteration workflows.
# Character mapping lists
CYRILLIC: list[tuple[str, str]]
GERMAN: list[tuple[str, str]]
GREEK: list[tuple[str, str]]
PRE_TRANSLATIONS: list[tuple[str, str]]
def add_uppercase_char(char_list: list[tuple[str, str]]) -> list[tuple[str, str]]:
"""
Add uppercase variants to character replacement list.
Parameters:
- char_list (list[tuple[str, str]]): List of character replacement tuples
Returns:
list[tuple[str, str]]: Enhanced list with uppercase variants
"""from slugify import CYRILLIC, GERMAN, GREEK, PRE_TRANSLATIONS
# Cyrillic mappings: ё->e, я->ya, х->h, у->y, щ->sch, ю->u (with uppercase variants)
print(CYRILLIC[:3]) # [('Ё', 'E'), ('ё', 'e'), ('Я', 'Ya'), ...]
# German umlaut mappings: ä->ae, ö->oe, ü->ue (with uppercase variants)
print(GERMAN[:3]) # [('Ä', 'Ae'), ('ä', 'ae'), ('Ö', 'Oe'), ...]
# Greek mappings: χ->ch, Ξ->X, ϒ->Y, υ->y, etc. (with uppercase variants)
print(GREEK[:3]) # [('Χ', 'Ch'), ('χ', 'ch'), ('Ξ', 'X'), ...]
# Combined mappings from all languages
print(len(PRE_TRANSLATIONS)) # Total count of all mappingsPython Slugify provides a command-line interface for text slugification with full parameter support.
def main(argv: list[str] | None = None):
"""
Command-line entry point for slugification.
Parameters:
- argv (list[str] | None): Command line arguments (default: None uses sys.argv)
"""# Basic usage
slugify "Hello World" # Output: hello-world
# From stdin
echo "Hello World" | slugify --stdin
# With options
slugify "Hello World" --separator="_" --max-length=8 # Output: hello_wo
# Custom replacements
slugify "Price: $50" --replacements "\$->dollar" # Output: price-dollar50
# Custom regex pattern
slugify "Keep_underscores" --regex-pattern "[^-a-z0-9_]+" # Output: keep_underscores
# Allow unicode
slugify "影師嗎" --allow-unicode # Output: 影師嗎
# Complex combination
slugify "The ÜBER café costs 50%" --stopwords "the" --replacements "Ü->UE" "%->percent" --max-length=20
# Output: ueber-cafe-costs-50
# Help
slugify --helpAll slugify() function parameters are available as command-line options:
--separator: Custom separator (default: "-")--max-length: Maximum output length--word-boundary: Truncate to complete words--save-order: Preserve word order when truncating--stopwords: Space-separated list of words to exclude--regex-pattern: Custom regex for disallowed characters--no-lowercase: Disable lowercase conversion--replacements: Replacement rules in format "old->new"--allow-unicode: Allow Unicode characters--no-entities: Disable HTML entity conversion--no-decimal: Disable HTML decimal conversion--no-hexadecimal: Disable HTML hexadecimal conversion--stdin: Read input from stdin# Default separator constant
DEFAULT_SEPARATOR: str = "-"
# Regex patterns for text processing
CHAR_ENTITY_PATTERN: re.Pattern[str] # HTML character entities
DECIMAL_PATTERN: re.Pattern[str] # HTML decimal references
HEX_PATTERN: re.Pattern[str] # HTML hexadecimal references
QUOTE_PATTERN: re.Pattern[str] # Quote characters
DISALLOWED_CHARS_PATTERN: re.Pattern[str] # Disallowed ASCII characters
DISALLOWED_UNICODE_CHARS_PATTERN: re.Pattern[str] # Disallowed Unicode characters
DUPLICATE_DASH_PATTERN: re.Pattern[str] # Duplicate dashes
NUMBERS_PATTERN: re.Pattern[str] # Comma-separated numbers# Version and package information
__version__: str # Package version (e.g., "8.0.4")
__title__: str # Package title ("python-slugify")
__author__: str # Package author ("Val Neekman")
__author_email__: str # Author email ("info@neekware.com")
__description__: str # Package description
__url__: str # Package URL ("https://github.com/un33k/python-slugify")
__license__: str # License ("MIT")
__copyright__: str # Copyright noticefrom slugify import __version__, __title__, __author__
print(f"{__title__} version {__version__} by {__author__}")
# Output: python-slugify version 8.0.4 by Val NeekmanPython Slugify is designed to be robust and handles various edge cases gracefully:
from slugify import slugify
# Handles various input types
result = slugify(123) # "123"
result = slugify(None) # ""
result = slugify("") # ""
# Graceful error handling for malformed HTML entities
result = slugify("&#invalid;") # Skips invalid entity, continues processingtext-unidecode>=1.3 (GPL & Perl Artistic license)Unidecode>=1.1.1 (install with pip install python-slugify[unidecode])The package automatically uses Unidecode if available, otherwise falls back to text-unidecode. Unidecode is considered more advanced for Unicode transliteration but has different licensing terms.