Fuzzy string matching library using Levenshtein Distance calculations for approximate string comparison and search
—
Core fuzzy string comparison functions that implement various algorithms for measuring string similarity. All functions return integer scores from 0-100, where 100 indicates identical strings and 0 indicates no similarity.
Standard string similarity using sequence matching algorithms.
def ratio(s1: str, s2: str) -> int:
"""
Calculate similarity ratio between two strings.
Returns:
int: Similarity score 0-100
"""Usage Example:
from fuzzywuzzy import fuzz
score = fuzz.ratio("this is a test", "this is a test!")
print(score) # 97
score = fuzz.ratio("fuzzy wuzzy", "wuzzy fuzzy")
print(score) # 91Finds the similarity of the most similar substring, useful when one string is contained within another.
def partial_ratio(s1: str, s2: str) -> int:
"""
Return the ratio of the most similar substring as a number between 0 and 100.
Returns:
int: Partial similarity score 0-100
"""Usage Example:
from fuzzywuzzy import fuzz
score = fuzz.partial_ratio("this is a test", "this is a test!")
print(score) # 100
score = fuzz.partial_ratio("fuzzy wuzzy", "wuzzy")
print(score) # 100Compares strings after sorting tokens alphabetically, handling word order variations.
def token_sort_ratio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Return similarity between 0 and 100 after sorting tokens.
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Force ASCII conversion (default True)
full_process: Apply full string processing (default True)
Returns:
int: Token sort similarity score 0-100
"""Usage Example:
from fuzzywuzzy import fuzz
score = fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")
print(score) # 100
score = fuzz.token_sort_ratio("new york mets", "mets new york")
print(score) # 100Combines partial ratio with token sorting for maximum flexibility in word order and substring matching.
def partial_token_sort_ratio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Return partial ratio of sorted tokens between 0 and 100.
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Force ASCII conversion (default True)
full_process: Apply full string processing (default True)
Returns:
int: Partial token sort similarity score 0-100
"""Uses set theory to handle token intersections and differences, ideal for strings with repeated words.
def token_set_ratio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Return similarity using token set comparison between 0 and 100.
Compares the intersection and differences of token sets to handle
repeated words and partial matches effectively.
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Force ASCII conversion (default True)
full_process: Apply full string processing (default True)
Returns:
int: Token set similarity score 0-100
"""Usage Example:
from fuzzywuzzy import fuzz
score = fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
print(score) # 100
score = fuzz.token_set_ratio("new york yankees", "yankees new york")
print(score) # 100Combines partial ratio with token set comparison for maximum robustness.
def partial_token_set_ratio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Return partial ratio using token set comparison between 0 and 100.
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Force ASCII conversion (default True)
full_process: Apply full string processing (default True)
Returns:
int: Partial token set similarity score 0-100
"""Fast ratio calculation with optional preprocessing.
def QRatio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Quick ratio comparison between two strings.
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Allow only ASCII characters (default True)
full_process: Process inputs to avoid double processing (default True)
Returns:
int: Quick similarity score 0-100
"""
def UQRatio(s1: str, s2: str, full_process: bool = True) -> int:
"""
Unicode quick ratio - QRatio with force_ascii=False.
Parameters:
s1: First string to compare
s2: Second string to compare
full_process: Process inputs (default True)
Returns:
int: Unicode quick similarity score 0-100
"""Intelligent combination of multiple algorithms for optimal results.
def WRatio(s1: str, s2: str, force_ascii: bool = True, full_process: bool = True) -> int:
"""
Return weighted similarity between 0 and 100 using multiple algorithms.
Automatically selects the best combination of ratio algorithms based on
string length differences and applies appropriate scaling factors.
Algorithm selection:
- Uses partial algorithms when one string is >1.5x longer than the other
- Applies 0.9 scaling for partial results, 0.6 for very long differences
- Uses token-based algorithms with 0.95 scaling
- Returns the highest score from all applicable algorithms
Parameters:
s1: First string to compare
s2: Second string to compare
force_ascii: Allow only ASCII characters (default True)
full_process: Process inputs (default True)
Returns:
int: Weighted similarity score 0-100
"""
def UWRatio(s1: str, s2: str, full_process: bool = True) -> int:
"""
Unicode weighted ratio - WRatio with force_ascii=False.
Parameters:
s1: First string to compare
s2: Second string to compare
full_process: Process inputs (default True)
Returns:
int: Unicode weighted similarity score 0-100
"""Usage Example:
from fuzzywuzzy import fuzz
# WRatio automatically selects the best algorithm
score = fuzz.WRatio("new york yankees", "yankees")
print(score) # Uses partial algorithms due to length difference
score = fuzz.WRatio("new york mets", "new york yankees")
print(score) # Uses token algorithms for similar-length stringsInstall with Tessl CLI
npx tessl i tessl/pypi-fuzzywuzzy