Alternative regular expression module providing enhanced pattern matching, fuzzy matching, and advanced Unicode support as a replacement for Python's re module.
—
Advanced string replacement capabilities that extend beyond the standard re module with enhanced parameters for position control, concurrent execution, timeout handling, and format-based replacements. These functions provide powerful text transformation capabilities for complex pattern-based string manipulation.
Replace pattern occurrences in a string with a replacement string or callable function, providing precise control over the number of replacements and search boundaries.
def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
concurrent=None, timeout=None, ignore_unused=False, **kwargs):
"""
Return the string obtained by replacing non-overlapping occurrences of pattern with replacement.
Args:
pattern (str): Regular expression pattern to find
repl (str or callable): Replacement string or function
string (str): String to perform substitutions on
count (int, optional): Maximum number of replacements (0 = all)
flags (int, optional): Regex flags to modify matching behavior
pos (int, optional): Start position for searching (default: 0)
endpos (int, optional): End position for searching (default: len(string))
concurrent (bool, optional): Release GIL during matching for multithreading
timeout (float, optional): Timeout in seconds for matching operation
ignore_unused (bool, optional): Ignore unused keyword arguments
**kwargs: Additional pattern compilation arguments
Returns:
str: String with replacements made
"""Usage Examples:
import regex
# Basic substitution
result = regex.sub(r'\d+', 'X', 'Replace 123 and 456 with X')
print(result) # 'Replace X and X with X'
# Limited number of replacements
result = regex.sub(r'\d+', 'NUM', 'Values: 1, 2, 3, 4', count=2)
print(result) # 'Values: NUM, NUM, 3, 4'
# Using replacement function
def capitalize_match(match):
return match.group().upper()
result = regex.sub(r'\b\w+\b', capitalize_match, 'hello world')
print(result) # 'HELLO WORLD'
# Position-bounded substitution
result = regex.sub(r'\d', 'X', '12abc34def56', pos=2, endpos=8)
print(result) # '12aXcXXdef56'
# Using backreferences
result = regex.sub(r'(\w+) (\w+)', r'\2, \1', 'John Doe')
print(result) # 'Doe, John'
# Named group backreferences
result = regex.sub(r'(?P<first>\w+) (?P<last>\w+)', r'\g<last>, \g<first>', 'Jane Smith')
print(result) # 'Smith, Jane'Replace pattern occurrences using Python's format string syntax, providing more flexible and readable replacement patterns.
def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
concurrent=None, timeout=None, ignore_unused=False, **kwargs):
"""
Return the string obtained by replacing pattern occurrences using format string.
Args:
pattern (str): Regular expression pattern to find
format (str or callable): Format string or function using Python format syntax
string (str): String to perform substitutions on
count (int, optional): Maximum number of replacements (0 = all)
flags (int, optional): Regex flags to modify matching behavior
pos (int, optional): Start position for searching (default: 0)
endpos (int, optional): End position for searching (default: len(string))
concurrent (bool, optional): Release GIL during matching for multithreading
timeout (float, optional): Timeout in seconds for matching operation
ignore_unused (bool, optional): Ignore unused keyword arguments
**kwargs: Additional pattern compilation arguments
Returns:
str: String with format-based replacements made
"""Usage Examples:
import regex
# Format string with positional arguments
result = regex.subf(r'(\w+) (\w+)', '{1}, {0}', 'John Doe')
print(result) # 'Doe, John'
# Format string with named groups
pattern = r'(?P<name>\w+): (?P<value>\d+)'
format_str = '{name} = {value}'
result = regex.subf(pattern, format_str, 'width: 100, height: 200')
print(result) # 'width = 100, height = 200'
# Format function for complex transformations
def format_currency(match):
amount = float(match.group('amount'))
return f'${amount:.2f}'
pattern = r'(?P<amount>\d+\.\d+)'
result = regex.subf(pattern, format_currency, 'Price: 19.9, Tax: 2.5')
print(result) # 'Price: $19.90, Tax: $2.50'Perform substitutions and return both the modified string and the number of substitutions made, useful for tracking replacement operations.
def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None,
concurrent=None, timeout=None, ignore_unused=False, **kwargs):
"""
Return a 2-tuple containing (new_string, number_of_substitutions_made).
Args:
pattern (str): Regular expression pattern to find
repl (str or callable): Replacement string or function
string (str): String to perform substitutions on
count (int, optional): Maximum number of replacements (0 = all)
flags (int, optional): Regex flags to modify matching behavior
pos (int, optional): Start position for searching (default: 0)
endpos (int, optional): End position for searching (default: len(string))
concurrent (bool, optional): Release GIL during matching for multithreading
timeout (float, optional): Timeout in seconds for matching operation
ignore_unused (bool, optional): Ignore unused keyword arguments
**kwargs: Additional pattern compilation arguments
Returns:
tuple: (modified_string, substitution_count)
"""Usage Examples:
import regex
# Basic substitution with count
result, count = regex.subn(r'\d+', 'NUM', 'Replace 123 and 456')
print(f"Result: '{result}', Replacements: {count}")
# Result: 'Replace NUM and NUM', Replacements: 2
# Limited replacements with count
result, count = regex.subn(r'\w+', 'WORD', 'one two three four', count=2)
print(f"Result: '{result}', Replacements: {count}")
# Result: 'WORD WORD three four', Replacements: 2
# Check if any replacements were made
original = 'No numbers here'
result, count = regex.subn(r'\d+', 'NUM', original)
if count == 0:
print("No substitutions were made")
else:
print(f"Made {count} substitutions: {result}")Combine format-based replacement with substitution counting for complete replacement operation tracking.
def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None,
concurrent=None, timeout=None, ignore_unused=False, **kwargs):
"""
Same as subf but also return the number of substitutions made.
Args:
pattern (str): Regular expression pattern to find
format (str or callable): Format string or function using Python format syntax
string (str): String to perform substitutions on
count (int, optional): Maximum number of replacements (0 = all)
flags (int, optional): Regex flags to modify matching behavior
pos (int, optional): Start position for searching (default: 0)
endpos (int, optional): End position for searching (default: len(string))
concurrent (bool, optional): Release GIL during matching for multithreading
timeout (float, optional): Timeout in seconds for matching operation
ignore_unused (bool, optional): Ignore unused keyword arguments
**kwargs: Additional pattern compilation arguments
Returns:
tuple: (formatted_string, substitution_count)
"""Usage Examples:
import regex
# Format-based substitution with count
pattern = r'(?P<name>\w+): (?P<value>\d+)'
format_str = '{name}={value}'
result, count = regex.subfn(pattern, format_str, 'width: 100, height: 200')
print(f"Result: '{result}', Replacements: {count}")
# Result: 'width=100, height=200', Replacements: 2
# Track format replacements
def format_phone(match):
area = match.group(1)
number = match.group(2)
return f"({area}) {number[:3]}-{number[3:]}"
pattern = r'(\d{3})(\d{7})'
text = 'Call 5551234567 or 8009876543'
result, count = regex.subfn(pattern, format_phone, text)
print(f"Formatted {count} phone numbers: {result}")
# Formatted 2 phone numbers: Call (555) 123-4567 or (800) 987-6543Replacement functions receive a Match object and can perform complex transformations:
def smart_replace(match):
value = match.group()
if value.isdigit():
return str(int(value) * 2) # Double numbers
else:
return value.upper() # Uppercase text
result = regex.sub(r'\w+', smart_replace, 'test 123 hello 456')
print(result) # 'TEST 246 HELLO 912'Use Match object properties for conditional replacements:
def conditional_replace(match):
word = match.group()
if len(word) > 4:
return word.upper()
else:
return word.lower()
result = regex.sub(r'\b\w+\b', conditional_replace, 'Hello World Test')
print(result) # 'hello WORLD test'Access match position information in replacement functions:
def position_replace(match):
start = match.start()
text = match.group()
return f"{text}@{start}"
result = regex.sub(r'\w+', position_replace, 'one two three')
print(result) # 'one@0 two@4 three@8'Use the REVERSE flag for right-to-left pattern matching:
# Replace from right to left
result = regex.sub(r'\d+', 'X', '123abc456def789', flags=regex.REVERSE, count=2)
print(result) # '123abc456defX' (replaces from right)Combine fuzzy matching with substitutions:
# Replace approximate matches
pattern = r'(?e)(hello){e<=1}' # Allow 1 error
result = regex.sub(pattern, 'hi', 'helo world, hallo there')
print(result) # 'hi world, hi there'Enable concurrent execution for performance with large texts:
# Process large text with concurrent execution
large_text = "..." * 10000 # Large text
result = regex.sub(r'\w+', 'WORD', large_text, concurrent=True)
# Set timeout for potentially slow operations
try:
result = regex.sub(complex_pattern, replacement, text, timeout=5.0)
except regex.error as e:
print(f"Substitution timed out: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-regex