Python library for perceptual image hashing with multiple algorithms including average, perceptual, difference, wavelet, color, and crop-resistant hashing
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Hash container classes that provide comparison operations, string conversion, and mathematical operations for computing similarity between images. These classes encapsulate hash data and provide the interface for all hash operations.
Primary hash container for single perceptual hashes with full comparison and serialization support.
class ImageHash:
def __init__(self, binary_array):
"""
Initialize ImageHash with binary array.
Args:
binary_array (NDArray): Boolean numpy array representing the hash
"""
def __str__(self):
"""
Convert hash to hexadecimal string representation.
Returns:
str: Hexadecimal string of the hash
"""
def __repr__(self):
"""
Return string representation of the binary array.
Returns:
str: String representation of hash array
"""
def __sub__(self, other):
"""
Calculate Hamming distance between two hashes.
Args:
other (ImageHash): Other hash to compare against
Returns:
int: Hamming distance (number of differing bits)
Raises:
TypeError: If other is None or hashes have different shapes
"""
def __eq__(self, other):
"""
Check if two hashes are equal.
Args:
other (object): Other hash to compare
Returns:
bool: True if hashes are identical
"""
def __ne__(self, other):
"""
Check if two hashes are not equal.
Args:
other (object): Other hash to compare
Returns:
bool: True if hashes are different
"""
def __hash__(self):
"""
Return 8-bit integer hash for dictionary keys.
Returns:
int: 8-bit hash value for use as dictionary key
"""
def __len__(self):
"""
Return bit length of the hash.
Returns:
int: Total number of bits in the hash
"""Usage Example:
from PIL import Image
import imagehash
# Create hashes
image1 = Image.open('photo1.jpg')
image2 = Image.open('photo2.jpg')
hash1 = imagehash.average_hash(image1)
hash2 = imagehash.average_hash(image2)
# Basic operations
print(f"Hash 1: {hash1}") # String representation
print(f"Hash length: {len(hash1)}") # Bit length (64 for 8x8 hash)
# Comparison operations
distance = hash1 - hash2 # Hamming distance
are_equal = hash1 == hash2 # Exact equality
are_different = hash1 != hash2 # Inequality
print(f"Hamming distance: {distance}")
print(f"Are equal: {are_equal}")
# Use as dictionary key
hash_dict = {hash1: 'photo1.jpg', hash2: 'photo2.jpg'}
filename = hash_dict.get(hash1) # Retrieve using hash as key
# Hash comparison with tolerance
similarity_threshold = 5
are_similar = distance < similarity_thresholdContainer for multiple hashes used in crop-resistant hashing, with advanced matching capabilities.
class ImageMultiHash:
def __init__(self, hashes):
"""
Initialize with list of ImageHash objects.
Args:
hashes (list[ImageHash]): List of individual segment hashes
"""
def __eq__(self, other):
"""
Check equality using matches method.
Args:
other (object): Other multi-hash to compare
Returns:
bool: True if hashes match
"""
def __ne__(self, other):
"""
Check inequality.
Args:
other (object): Other multi-hash to compare
Returns:
bool: True if hashes don't match
"""
def __sub__(self, other, hamming_cutoff=None, bit_error_rate=None):
"""
Calculate distance score between multi-hashes.
Args:
other (ImageMultiHash): Other multi-hash to compare
hamming_cutoff (float, optional): Maximum hamming distance threshold
bit_error_rate (float, optional): Percentage of bits that can differ (default: 0.25)
Returns:
float: Distance score (lower = more similar)
"""
def __hash__(self):
"""
Return hash of tuple of segment hashes.
Returns:
int: Hash value for dictionary usage
"""
def __str__(self):
"""
Return comma-separated string of segment hashes.
Returns:
str: Comma-separated hex strings
"""
def __repr__(self):
"""
Return representation of segment hashes list.
Returns:
str: String representation of hash list
"""
def hash_diff(self, other_hash, hamming_cutoff=None, bit_error_rate=None):
"""
Get difference metrics between two multi-hashes.
Args:
other_hash (ImageMultiHash): Other multi-hash to compare
hamming_cutoff (float, optional): Maximum hamming distance threshold
bit_error_rate (float, optional): Bit error rate (default: 0.25)
Returns:
tuple[int, int]: (number_of_matching_segments, sum_of_hamming_distances)
"""
def matches(self, other_hash, region_cutoff=1, hamming_cutoff=None, bit_error_rate=None):
"""
Check if multi-hash matches another with configurable thresholds.
Args:
other_hash (ImageMultiHash): Other multi-hash to compare
region_cutoff (int): Minimum matching regions required (default: 1)
hamming_cutoff (float, optional): Maximum hamming distance per region
bit_error_rate (float, optional): Bit error rate tolerance (default: 0.25)
Returns:
bool: True if hashes match according to criteria
"""
def best_match(self, other_hashes, hamming_cutoff=None, bit_error_rate=None):
"""
Find best matching hash from a list of candidates.
Args:
other_hashes (list[ImageMultiHash]): List of candidate hashes
hamming_cutoff (float, optional): Maximum hamming distance threshold
bit_error_rate (float, optional): Bit error rate tolerance (default: 0.25)
Returns:
ImageMultiHash: Best matching hash from the list
"""Usage Example:
from PIL import Image
import imagehash
# Create crop-resistant hashes
full_image = Image.open('full_photo.jpg')
cropped_image = Image.open('cropped_photo.jpg')
full_hash = imagehash.crop_resistant_hash(full_image)
crop_hash = imagehash.crop_resistant_hash(cropped_image)
# Basic matching
matches = full_hash.matches(crop_hash)
print(f"Images match: {matches}")
# Flexible matching with custom thresholds
strict_match = full_hash.matches(
crop_hash,
region_cutoff=2, # Require at least 2 matching regions
bit_error_rate=0.15 # Allow 15% bit differences
)
# Get detailed comparison metrics
num_matches, total_distance = full_hash.hash_diff(crop_hash)
print(f"Matching segments: {num_matches}")
print(f"Total distance: {total_distance}")
# Distance scoring
similarity_score = full_hash - crop_hash
print(f"Similarity score: {similarity_score}")
# Find best match from multiple candidates
candidates = [
imagehash.crop_resistant_hash(Image.open('candidate1.jpg')),
imagehash.crop_resistant_hash(Image.open('candidate2.jpg')),
imagehash.crop_resistant_hash(Image.open('candidate3.jpg'))
]
best_match = full_hash.best_match(candidates)
print(f"Best match: {best_match}")
# Use as dictionary key
multi_hash_dict = {full_hash: 'full_photo.jpg'}# Compare one hash against many
target_hash = imagehash.average_hash(target_image)
image_hashes = [
imagehash.average_hash(img) for img in image_list
]
# Find all similar images
similar_images = []
for i, img_hash in enumerate(image_hashes):
distance = target_hash - img_hash
if distance < 10: # similarity threshold
similar_images.append((i, distance))
# Sort by similarity
similar_images.sort(key=lambda x: x[1])# Crop-resistant matching with different tolerance levels
strict_tolerance = multi_hash1.matches(
multi_hash2,
region_cutoff=3, # Need 3+ matching regions
bit_error_rate=0.10 # Only 10% bit differences allowed
)
loose_tolerance = multi_hash1.matches(
multi_hash2,
region_cutoff=1, # Only 1 matching region needed
bit_error_rate=0.35 # Allow 35% bit differences
)# Create custom hash class wrapper
class CustomImageHash(imagehash.ImageHash):
def __init__(self, binary_array, metadata=None):
super().__init__(binary_array)
self.metadata = metadata or {}
def similarity_percentage(self, other):
distance = self - other
max_distance = len(self)
return (1 - distance / max_distance) * 100
# Usage
custom_hash = CustomImageHash(
imagehash.average_hash(image).hash,
metadata={'filename': 'image.jpg', 'algorithm': 'average'}
)try:
# Hash comparison with error handling
distance = hash1 - hash2
except TypeError as e:
if "must not be None" in str(e):
print("One of the hashes is None")
elif "same shape" in str(e):
print("Hashes have different sizes/shapes")
else:
raise
# Safe hash comparison
def safe_compare_hashes(hash1, hash2):
if hash1 is None or hash2 is None:
return None
try:
return hash1 - hash2
except TypeError:
return None # Incompatible hash types/sizes