Python extension for computing string edit distances and similarities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Functions for analyzing and manipulating edit operation sequences that transform one string into another. These functions provide detailed analysis of the specific changes needed and support conversion between different operation formats.
Find sequence of edit operations transforming one string to another, returning operations as triples (operation, source_pos, dest_pos).
def editops(*args):
"""
Find sequence of edit operations transforming one string to another.
Two calling patterns:
- editops(source_string, destination_string): Find operations
- editops(opcodes, source_length, destination_length): Convert from opcodes
Parameters:
- source_string: Source string or opcodes list for conversion
- destination_string: Destination string or source length for conversion
- (conversion mode): destination_length for conversion
Returns:
list: List of tuples (operation, source_pos, dest_pos) where:
- operation: 'delete', 'insert', or 'replace'
- source_pos: Position in source string
- dest_pos: Position in destination string
"""Usage Examples:
import Levenshtein
# Find edit operations between strings
ops = Levenshtein.editops("spam", "park")
print(ops) # [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]
# Convert from opcodes to editops
opcodes_list = [('delete', 0, 1, 0, 0), ('equal', 1, 3, 0, 2),
('insert', 3, 3, 2, 3), ('replace', 3, 4, 3, 4)]
editops_list = Levenshtein.editops(opcodes_list, len("spam"), len("park"))
print(editops_list) # Converted edit operationsFind sequence of edit operations in SequenceMatcher-compatible format, returning 5-tuples that include range information.
def opcodes(*args):
"""
Find sequence of edit operations in SequenceMatcher format.
Two calling patterns:
- opcodes(source_string, destination_string): Find operations
- opcodes(editops, source_length, destination_length): Convert from editops
Parameters:
- source_string: Source string or editops list for conversion
- destination_string: Destination string or source length for conversion
- (conversion mode): destination_length for conversion
Returns:
list: List of 5-tuples (operation, start1, end1, start2, end2) where:
- operation: 'delete', 'insert', 'replace', or 'equal'
- start1, end1: Range in source string
- start2, end2: Range in destination string
"""Usage Examples:
import Levenshtein
# Find opcodes between strings
ops = Levenshtein.opcodes("spam", "park")
for op in ops:
print(op)
# Output:
# ('delete', 0, 1, 0, 0)
# ('equal', 1, 3, 0, 2)
# ('insert', 3, 3, 2, 3)
# ('replace', 3, 4, 3, 4)
# Convert from editops to opcodes
editops_list = [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]
opcodes_list = Levenshtein.opcodes(editops_list, len("spam"), len("park"))
print(opcodes_list) # Converted opcodesFind identical blocks in two strings from edit operations, compatible with SequenceMatcher's get_matching_blocks() output.
def matching_blocks(edit_operations, source_string, destination_string):
"""
Find identical blocks in two strings from edit operations.
Parameters:
- edit_operations: List of editops or opcodes
- source_string: Source string or its length as int
- destination_string: Destination string or its length as int
Returns:
list: List of triples (source_pos, dest_pos, length) representing matching blocks.
Always ends with a zero-length block for compatibility.
"""Usage Examples:
import Levenshtein
# Get matching blocks from edit operations
a, b = "spam", "park"
ops = Levenshtein.editops(a, b)
blocks = Levenshtein.matching_blocks(ops, a, b)
print(blocks) # [(1, 0, 2), (4, 4, 0)]
# Works with string lengths too
blocks = Levenshtein.matching_blocks(ops, len(a), len(b))
print(blocks) # Same result
# Extract matching substrings
a, b = "dog kennels", "mattresses"
ops = Levenshtein.editops(a, b)
blocks = Levenshtein.matching_blocks(ops, a, b)
matches_a = ''.join([a[block[0]:block[0]+block[2]] for block in blocks])
matches_b = ''.join([b[block[1]:block[1]+block[2]] for block in blocks])
print(f"Matching parts: '{matches_a}' == '{matches_b}'") # 'ees' == 'ees'Apply a sequence of edit operations to transform a string, supporting both complete and partial operation sequences.
def apply_edit(edit_operations, source_string, destination_string):
"""
Apply sequence of edit operations to transform a string.
Parameters:
- edit_operations: List of editops or opcodes to apply
- source_string: Source string to transform
- destination_string: Destination string (for reference/validation)
Returns:
str: Transformed string after applying operations
"""Usage Examples:
import Levenshtein
# Apply complete edit sequence
source = "man"
dest = "scotsman"
ops = Levenshtein.editops(source, dest)
result = Levenshtein.apply_edit(ops, source, dest)
print(f"'{source}' -> '{result}'") # 'man' -> 'scotsman'
# Apply partial edit sequence
partial_ops = ops[:3] # First 3 operations only
partial_result = Levenshtein.apply_edit(partial_ops, source, dest)
print(f"Partial: '{source}' -> '{partial_result}'") # 'man' -> 'scoman'
# Works with opcodes too
opcodes_list = Levenshtein.opcodes(source, dest)
result = Levenshtein.apply_edit(opcodes_list, source, dest)
print(f"With opcodes: '{source}' -> '{result}'") # 'man' -> 'scotsman'Subtract an edit subsequence from a sequence, creating operations that complete the transformation after a partial application.
def subtract_edit(edit_operations, subsequence):
"""
Subtract an edit subsequence from an operation sequence.
Parameters:
- edit_operations: Complete list of edit operations
- subsequence: Ordered subset of operations to subtract
Returns:
list: Remaining operations after subtracting the subsequence
Note: Only works with editops (triples), not opcodes
"""Usage Examples:
import Levenshtein
# Get complete edit sequence
source = "man"
dest = "scotsman"
complete_ops = Levenshtein.editops(source, dest)
print("Complete ops:", complete_ops)
# Apply partial operations
partial_ops = complete_ops[:3]
intermediate = Levenshtein.apply_edit(partial_ops, source, dest)
print(f"After partial: '{intermediate}'")
# Calculate remaining operations
remaining_ops = Levenshtein.subtract_edit(complete_ops, partial_ops)
print("Remaining ops:", remaining_ops)
# Apply remaining operations
final = Levenshtein.apply_edit(remaining_ops, intermediate, dest)
print(f"Final result: '{final}'") # Should equal destInvert the sense of edit operations, swapping source and destination to reverse the transformation direction.
def inverse(edit_operations):
"""
Invert the sense of edit operations.
Returns operations that transform the destination string to the source string.
Works with both editops and opcodes.
Parameters:
- edit_operations: List of edit operations to invert
Returns:
list: Inverted edit operations
"""Usage Examples:
import Levenshtein
# Get edit operations
forward_ops = Levenshtein.editops("spam", "park")
print("Forward:", forward_ops)
# [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]
# Invert operations
reverse_ops = Levenshtein.inverse(forward_ops)
print("Reverse:", reverse_ops)
# [('insert', 0, 0), ('delete', 2, 3), ('replace', 3, 3)]
# Verify inversion works
result = Levenshtein.apply_edit(reverse_ops, "park", "spam")
print(f"Reverse transform: 'park' -> '{result}'") # 'park' -> 'spam'
# Works with opcodes too
forward_opcodes = Levenshtein.opcodes("spam", "park")
reverse_opcodes = Levenshtein.inverse(forward_opcodes)
print("Reversed opcodes:", reverse_opcodes)import Levenshtein
def analyze_transformation(source, dest):
"""Detailed analysis of string transformation."""
# Get different operation formats
editops_list = Levenshtein.editops(source, dest)
opcodes_list = Levenshtein.opcodes(source, dest)
blocks = Levenshtein.matching_blocks(editops_list, source, dest)
print(f"Transform '{source}' -> '{dest}'")
print(f"Edit operations: {editops_list}")
print(f"Opcodes: {opcodes_list}")
print(f"Matching blocks: {blocks}")
# Count operation types
op_counts = {}
for op, _, _ in editops_list:
op_counts[op] = op_counts.get(op, 0) + 1
print(f"Operation counts: {op_counts}")
return editops_list, opcodes_list, blocks
# Example
analyze_transformation("kitten", "sitting")import Levenshtein
def step_by_step_transform(source, dest):
"""Show each step of the transformation."""
ops = Levenshtein.editops(source, dest)
current = source
print(f"Start: '{current}'")
for i, (op, src_pos, dest_pos) in enumerate(ops, 1):
# Apply just this operation
single_op = [ops[i-1]] # Current operation
# For step-by-step, we need to build operations cumulatively
cumulative_ops = ops[:i]
current = Levenshtein.apply_edit(cumulative_ops, source, dest)
print(f"Step {i} ({op}): '{current}'")
print(f"Final: '{current}'")
# Example
step_by_step_transform("cat", "dog")import Levenshtein
def convert_operations(source, dest):
"""Convert between different operation formats."""
# Start with editops
editops_list = Levenshtein.editops(source, dest)
print("Editops:", editops_list)
# Convert to opcodes
opcodes_list = Levenshtein.opcodes(editops_list, len(source), len(dest))
print("Opcodes:", opcodes_list)
# Convert back to editops
editops_converted = Levenshtein.editops(opcodes_list, len(source), len(dest))
print("Converted back:", editops_converted)
# Verify they're equivalent
print("Equivalent:", editops_list == editops_converted)
# Example
convert_operations("hello", "world")import Levenshtein
def partial_transformation_demo(source, dest, steps=None):
"""Demonstrate partial application of transformations."""
ops = Levenshtein.editops(source, dest)
if steps is None:
steps = len(ops) // 2 # Apply half the operations
# Apply partial operations
partial_ops = ops[:steps]
intermediate = Levenshtein.apply_edit(partial_ops, source, dest)
# Get remaining operations
remaining_ops = Levenshtein.subtract_edit(ops, partial_ops)
# Apply remaining operations
final = Levenshtein.apply_edit(remaining_ops, intermediate, dest)
print(f"Source: '{source}'")
print(f"After {steps} operations: '{intermediate}'")
print(f"Final: '{final}'")
print(f"Matches destination: {final == dest}")
# Example
partial_transformation_demo("programming", "algorithm", 3)Install with Tessl CLI
npx tessl i tessl/pypi-levenshtein