tessl/pypi-levenshtein

Python extension for computing string edit distances and similarities.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Edit Operations

Name: tessl/pypi-levenshtein
Author: tessl

Functions for analyzing and manipulating edit operation sequences that transform one string into another. These functions provide detailed analysis of the specific changes needed and support conversion between different operation formats.

Capabilities

Edit Operations (Triples)

Find sequence of edit operations transforming one string to another, returning operations as triples (operation, source_pos, dest_pos).

def editops(*args):
    """
    Find sequence of edit operations transforming one string to another.
    
    Two calling patterns:
    - editops(source_string, destination_string): Find operations
    - editops(opcodes, source_length, destination_length): Convert from opcodes
    
    Parameters:
    - source_string: Source string or opcodes list for conversion
    - destination_string: Destination string or source length for conversion
    - (conversion mode): destination_length for conversion
    
    Returns:
    list: List of tuples (operation, source_pos, dest_pos) where:
        - operation: 'delete', 'insert', or 'replace'
        - source_pos: Position in source string
        - dest_pos: Position in destination string
    """

Usage Examples:

import Levenshtein

# Find edit operations between strings
ops = Levenshtein.editops("spam", "park")
print(ops)  # [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]

# Convert from opcodes to editops
opcodes_list = [('delete', 0, 1, 0, 0), ('equal', 1, 3, 0, 2), 
                ('insert', 3, 3, 2, 3), ('replace', 3, 4, 3, 4)]
editops_list = Levenshtein.editops(opcodes_list, len("spam"), len("park"))
print(editops_list)  # Converted edit operations

Opcodes (5-tuples)

Find sequence of edit operations in SequenceMatcher-compatible format, returning 5-tuples that include range information.

def opcodes(*args):
    """
    Find sequence of edit operations in SequenceMatcher format.
    
    Two calling patterns:
    - opcodes(source_string, destination_string): Find operations
    - opcodes(editops, source_length, destination_length): Convert from editops
    
    Parameters:
    - source_string: Source string or editops list for conversion
    - destination_string: Destination string or source length for conversion  
    - (conversion mode): destination_length for conversion
    
    Returns:
    list: List of 5-tuples (operation, start1, end1, start2, end2) where:
        - operation: 'delete', 'insert', 'replace', or 'equal'
        - start1, end1: Range in source string
        - start2, end2: Range in destination string
    """

Usage Examples:

import Levenshtein

# Find opcodes between strings
ops = Levenshtein.opcodes("spam", "park")
for op in ops:
    print(op)
# Output:
# ('delete', 0, 1, 0, 0)
# ('equal', 1, 3, 0, 2)  
# ('insert', 3, 3, 2, 3)
# ('replace', 3, 4, 3, 4)

# Convert from editops to opcodes
editops_list = [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]
opcodes_list = Levenshtein.opcodes(editops_list, len("spam"), len("park"))
print(opcodes_list)  # Converted opcodes

Matching Blocks

Find identical blocks in two strings from edit operations, compatible with SequenceMatcher's get_matching_blocks() output.

def matching_blocks(edit_operations, source_string, destination_string):
    """
    Find identical blocks in two strings from edit operations.
    
    Parameters:
    - edit_operations: List of editops or opcodes
    - source_string: Source string or its length as int
    - destination_string: Destination string or its length as int
    
    Returns:
    list: List of triples (source_pos, dest_pos, length) representing matching blocks.
          Always ends with a zero-length block for compatibility.
    """

Usage Examples:

import Levenshtein

# Get matching blocks from edit operations
a, b = "spam", "park"
ops = Levenshtein.editops(a, b)
blocks = Levenshtein.matching_blocks(ops, a, b)
print(blocks)  # [(1, 0, 2), (4, 4, 0)]

# Works with string lengths too
blocks = Levenshtein.matching_blocks(ops, len(a), len(b))
print(blocks)  # Same result

# Extract matching substrings
a, b = "dog kennels", "mattresses"  
ops = Levenshtein.editops(a, b)
blocks = Levenshtein.matching_blocks(ops, a, b)
matches_a = ''.join([a[block[0]:block[0]+block[2]] for block in blocks])
matches_b = ''.join([b[block[1]:block[1]+block[2]] for block in blocks])
print(f"Matching parts: '{matches_a}' == '{matches_b}'")  # 'ees' == 'ees'

Apply Edit Operations

Apply a sequence of edit operations to transform a string, supporting both complete and partial operation sequences.

def apply_edit(edit_operations, source_string, destination_string):
    """
    Apply sequence of edit operations to transform a string.
    
    Parameters:
    - edit_operations: List of editops or opcodes to apply
    - source_string: Source string to transform
    - destination_string: Destination string (for reference/validation)
    
    Returns:
    str: Transformed string after applying operations
    """

Usage Examples:

import Levenshtein

# Apply complete edit sequence
source = "man"
dest = "scotsman"
ops = Levenshtein.editops(source, dest)
result = Levenshtein.apply_edit(ops, source, dest)
print(f"'{source}' -> '{result}'")  # 'man' -> 'scotsman'

# Apply partial edit sequence
partial_ops = ops[:3]  # First 3 operations only
partial_result = Levenshtein.apply_edit(partial_ops, source, dest)
print(f"Partial: '{source}' -> '{partial_result}'")  # 'man' -> 'scoman'

# Works with opcodes too
opcodes_list = Levenshtein.opcodes(source, dest)
result = Levenshtein.apply_edit(opcodes_list, source, dest)
print(f"With opcodes: '{source}' -> '{result}'")  # 'man' -> 'scotsman'

Subtract Edit Operations

Subtract an edit subsequence from a sequence, creating operations that complete the transformation after a partial application.

def subtract_edit(edit_operations, subsequence):
    """
    Subtract an edit subsequence from an operation sequence.
    
    Parameters:
    - edit_operations: Complete list of edit operations
    - subsequence: Ordered subset of operations to subtract
    
    Returns:
    list: Remaining operations after subtracting the subsequence
    
    Note: Only works with editops (triples), not opcodes
    """

Usage Examples:

import Levenshtein

# Get complete edit sequence
source = "man"
dest = "scotsman"
complete_ops = Levenshtein.editops(source, dest)
print("Complete ops:", complete_ops)

# Apply partial operations
partial_ops = complete_ops[:3]
intermediate = Levenshtein.apply_edit(partial_ops, source, dest)
print(f"After partial: '{intermediate}'")

# Calculate remaining operations
remaining_ops = Levenshtein.subtract_edit(complete_ops, partial_ops)
print("Remaining ops:", remaining_ops)

# Apply remaining operations
final = Levenshtein.apply_edit(remaining_ops, intermediate, dest)
print(f"Final result: '{final}'")  # Should equal dest

Inverse Edit Operations

Invert the sense of edit operations, swapping source and destination to reverse the transformation direction.

def inverse(edit_operations):
    """
    Invert the sense of edit operations.
    
    Returns operations that transform the destination string to the source string.
    Works with both editops and opcodes.
    
    Parameters:
    - edit_operations: List of edit operations to invert
    
    Returns:
    list: Inverted edit operations
    """

Usage Examples:

import Levenshtein

# Get edit operations
forward_ops = Levenshtein.editops("spam", "park")
print("Forward:", forward_ops)
# [('delete', 0, 0), ('insert', 3, 2), ('replace', 3, 3)]

# Invert operations
reverse_ops = Levenshtein.inverse(forward_ops)
print("Reverse:", reverse_ops)  
# [('insert', 0, 0), ('delete', 2, 3), ('replace', 3, 3)]

# Verify inversion works
result = Levenshtein.apply_edit(reverse_ops, "park", "spam")
print(f"Reverse transform: 'park' -> '{result}'")  # 'park' -> 'spam'

# Works with opcodes too
forward_opcodes = Levenshtein.opcodes("spam", "park")
reverse_opcodes = Levenshtein.inverse(forward_opcodes)
print("Reversed opcodes:", reverse_opcodes)

Advanced Usage Patterns

Analyzing String Transformations

import Levenshtein

def analyze_transformation(source, dest):
    """Detailed analysis of string transformation."""
    # Get different operation formats
    editops_list = Levenshtein.editops(source, dest)
    opcodes_list = Levenshtein.opcodes(source, dest)
    blocks = Levenshtein.matching_blocks(editops_list, source, dest)
    
    print(f"Transform '{source}' -> '{dest}'")
    print(f"Edit operations: {editops_list}")
    print(f"Opcodes: {opcodes_list}")
    print(f"Matching blocks: {blocks}")
    
    # Count operation types
    op_counts = {}
    for op, _, _ in editops_list:
        op_counts[op] = op_counts.get(op, 0) + 1
    print(f"Operation counts: {op_counts}")
    
    return editops_list, opcodes_list, blocks

# Example
analyze_transformation("kitten", "sitting")

Step-by-Step Transformation

import Levenshtein

def step_by_step_transform(source, dest):
    """Show each step of the transformation."""
    ops = Levenshtein.editops(source, dest)
    current = source
    
    print(f"Start: '{current}'")
    
    for i, (op, src_pos, dest_pos) in enumerate(ops, 1):
        # Apply just this operation
        single_op = [ops[i-1]]  # Current operation
        # For step-by-step, we need to build operations cumulatively
        cumulative_ops = ops[:i]
        current = Levenshtein.apply_edit(cumulative_ops, source, dest)
        print(f"Step {i} ({op}): '{current}'")
    
    print(f"Final: '{current}'")

# Example
step_by_step_transform("cat", "dog")

Operation Format Conversion

import Levenshtein

def convert_operations(source, dest):
    """Convert between different operation formats."""
    # Start with editops
    editops_list = Levenshtein.editops(source, dest)
    print("Editops:", editops_list)
    
    # Convert to opcodes
    opcodes_list = Levenshtein.opcodes(editops_list, len(source), len(dest))
    print("Opcodes:", opcodes_list)
    
    # Convert back to editops
    editops_converted = Levenshtein.editops(opcodes_list, len(source), len(dest))
    print("Converted back:", editops_converted)
    
    # Verify they're equivalent
    print("Equivalent:", editops_list == editops_converted)

# Example
convert_operations("hello", "world")

Partial Transformations

import Levenshtein

def partial_transformation_demo(source, dest, steps=None):
    """Demonstrate partial application of transformations."""
    ops = Levenshtein.editops(source, dest)
    
    if steps is None:
        steps = len(ops) // 2  # Apply half the operations
    
    # Apply partial operations
    partial_ops = ops[:steps]
    intermediate = Levenshtein.apply_edit(partial_ops, source, dest)
    
    # Get remaining operations
    remaining_ops = Levenshtein.subtract_edit(ops, partial_ops)
    
    # Apply remaining operations
    final = Levenshtein.apply_edit(remaining_ops, intermediate, dest)
    
    print(f"Source: '{source}'")
    print(f"After {steps} operations: '{intermediate}'")
    print(f"Final: '{final}'")
    print(f"Matches destination: {final == dest}")

# Example
partial_transformation_demo("programming", "algorithm", 3)

Install with Tessl CLI