Python compatibility wrapper for computing string edit distances and similarities using fast Levenshtein algorithms.
npx @tessl/cli install tessl/pypi-python-levenshtein@0.27.0A Python compatibility wrapper providing fast computation of string edit distances and similarities through the Levenshtein C extension library. This package enables legacy code to continue working with the old python-Levenshtein package name while accessing the modern Levenshtein package functionality.
pip install python-Levenshteinimport LevenshteinImport specific functions:
from Levenshtein import distance, ratio, editopsimport Levenshtein
# Calculate edit distance between strings
distance = Levenshtein.distance("hello", "world")
print(distance) # 4
# Calculate similarity ratio (0.0 to 1.0)
similarity = Levenshtein.ratio("hello", "help")
print(similarity) # 0.75
# Get edit operations needed to transform one string to another
ops = Levenshtein.editops("kitten", "sitting")
print(ops) # [('substitute', 0, 0), ('substitute', 4, 4), ('insert', 6, 6)]The python-Levenshtein package serves as a compatibility wrapper that maintains backward compatibility for legacy code while providing access to modern string similarity functionality:
Levenshtein==0.27.1python-Levenshtein package name to continue working unchangedLevenshtein packageimport Levenshtein regardless of whether installed via python-Levenshtein or LevenshteinThis design ensures seamless migration paths while leveraging the optimized C implementation for maximum performance in string similarity computations.
Basic string distance calculations using various algorithms.
def distance(s1: str, s2: str) -> int:
"""
Compute the Levenshtein edit distance between two strings.
Args:
s1 (str): First string
s2 (str): Second string
Returns:
int: Edit distance (number of operations needed to transform s1 to s2)
"""
def ratio(s1: str, s2: str) -> float:
"""
Calculate string similarity ratio.
Args:
s1 (str): First string
s2 (str): Second string
Returns:
float: Similarity ratio between 0.0 and 1.0
"""
def hamming(s1: str, s2: str) -> int:
"""
Compute Hamming distance between strings of equal length.
Args:
s1 (str): First string
s2 (str): Second string (must be same length as s1)
Returns:
int: Hamming distance
Raises:
ValueError: If strings have different lengths
"""Specialized string similarity algorithms for different use cases.
def jaro(s1: str, s2: str) -> float:
"""
Compute Jaro string similarity metric.
Args:
s1 (str): First string
s2 (str): Second string
Returns:
float: Jaro similarity score between 0.0 and 1.0
"""
def jaro_winkler(s1: str, s2: str) -> float:
"""
Compute Jaro-Winkler string similarity metric.
Args:
s1 (str): First string
s2 (str): Second string
Returns:
float: Jaro-Winkler similarity score between 0.0 and 1.0
"""Functions for finding representative strings from collections.
def median(strings: list[str]) -> str:
"""
Find approximate median string from a sequence of strings.
Args:
strings (list[str]): List of strings to find median for
Returns:
str: Approximate median string
"""
def median_improve(strings: list[str], median: str) -> str:
"""
Improve median string computation.
Args:
strings (list[str]): List of strings
median (str): Initial median string to improve
Returns:
str: Improved median string
"""
def quickmedian(strings: list[str]) -> str:
"""
Fast median string computation.
Args:
strings (list[str]): List of strings to find median for
Returns:
str: Quick median string
"""
def setmedian(strings: list[str]) -> str:
"""
Compute median for set of strings.
Args:
strings (list[str]): List of strings treated as a set
Returns:
str: Set median string
"""Functions for comparing collections of strings.
def seqratio(seq1: list[str], seq2: list[str]) -> float:
"""
Similarity ratio for string sequences.
Args:
seq1 (list[str]): First sequence of strings
seq2 (list[str]): Second sequence of strings
Returns:
float: Sequence similarity ratio between 0.0 and 1.0
"""
def setratio(set1: list[str], set2: list[str]) -> float:
"""
Similarity ratio for string sets.
Args:
set1 (list[str]): First set of strings
set2 (list[str]): Second set of strings
Returns:
float: Set similarity ratio between 0.0 and 1.0
"""Functions for analyzing and manipulating the edit operations between strings.
def editops(s1: str, s2: str) -> list[tuple[str, int, int]]:
"""
Return list of edit operations to transform one string to another.
Args:
s1 (str): Source string
s2 (str): Target string
Returns:
list[tuple[str, int, int]]: List of (operation, pos1, pos2) tuples
where operation is 'delete', 'insert', or 'substitute'
"""
def opcodes(s1: str, s2: str) -> list[tuple[str, int, int, int, int]]:
"""
Return list of opcodes (similar to difflib.SequenceMatcher.get_opcodes()).
Args:
s1 (str): Source string
s2 (str): Target string
Returns:
list[tuple[str, int, int, int, int]]: List of (tag, i1, i2, j1, j2) tuples
"""
def inverse(ops: list[tuple[str, int, int]]) -> list[tuple[str, int, int]]:
"""
Invert edit operations.
Args:
ops (list[tuple[str, int, int]]): Edit operations to invert
Returns:
list[tuple[str, int, int]]: Inverted edit operations
"""
def apply_edit(ops: list[tuple[str, int, int]], s1: str, s2: str) -> str:
"""
Apply edit operations to transform string.
Args:
ops (list[tuple[str, int, int]]): Edit operations to apply
s1 (str): Source string
s2 (str): Target string
Returns:
str: Transformed string
"""
def matching_blocks(s1: str, s2: str) -> list[tuple[int, int, int]]:
"""
Return matching blocks between strings.
Args:
s1 (str): First string
s2 (str): Second string
Returns:
list[tuple[int, int, int]]: List of (pos1, pos2, length) tuples
"""
def subtract_edit(ops1: list[tuple[str, int, int]], ops2: list[tuple[str, int, int]]) -> list[tuple[str, int, int]]:
"""
Subtract edit operations.
Args:
ops1 (list[tuple[str, int, int]]): First set of edit operations
ops2 (list[tuple[str, int, int]]): Second set of edit operations
Returns:
list[tuple[str, int, int]]: Resulting edit operations
"""import Levenshtein
# Find the most similar string from a list
target = "hello"
candidates = ["hallo", "hullo", "help", "world"]
best_match = min(candidates, key=lambda x: Levenshtein.distance(target, x))
print(f"Best match: {best_match}") # "hallo"
# Get similarity scores
similarities = [(candidate, Levenshtein.ratio(target, candidate))
for candidate in candidates]
similarities.sort(key=lambda x: x[1], reverse=True)
print("Similarity scores:", similarities)import Levenshtein
# Analyze how to transform one string to another
source = "kitten"
target = "sitting"
# Get edit operations
ops = Levenshtein.editops(source, target)
print("Edit operations:", ops)
# Get detailed opcodes
codes = Levenshtein.opcodes(source, target)
print("Opcodes:", codes)
# Find matching blocks
blocks = Levenshtein.matching_blocks(source, target)
print("Matching blocks:", blocks)import Levenshtein
# Find median string from a collection
words = ["cat", "bat", "rat", "hat", "mat"]
median_word = Levenshtein.median(words)
print(f"Median word: {median_word}")
# Compare sequences of strings
seq1 = ["hello", "world"]
seq2 = ["hello", "world", "python"]
similarity = Levenshtein.seqratio(seq1, seq2)
print(f"Sequence similarity: {similarity}")