CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-imagehash

Python library for perceptual image hashing with multiple algorithms including average, perceptual, difference, wavelet, color, and crop-resistant hashing

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

core-classes.mddocs/

Core Classes

Hash container classes that provide comparison operations, string conversion, and mathematical operations for computing similarity between images. These classes encapsulate hash data and provide the interface for all hash operations.

Capabilities

ImageHash Class

Primary hash container for single perceptual hashes with full comparison and serialization support.

class ImageHash:
    def __init__(self, binary_array):
        """
        Initialize ImageHash with binary array.
        
        Args:
            binary_array (NDArray): Boolean numpy array representing the hash
        """
    
    def __str__(self):
        """
        Convert hash to hexadecimal string representation.
        
        Returns:
            str: Hexadecimal string of the hash
        """
    
    def __repr__(self):
        """
        Return string representation of the binary array.
        
        Returns:
            str: String representation of hash array
        """
    
    def __sub__(self, other):
        """
        Calculate Hamming distance between two hashes.
        
        Args:
            other (ImageHash): Other hash to compare against
        
        Returns:
            int: Hamming distance (number of differing bits)
        
        Raises:
            TypeError: If other is None or hashes have different shapes
        """
    
    def __eq__(self, other):
        """
        Check if two hashes are equal.
        
        Args:
            other (object): Other hash to compare
        
        Returns:
            bool: True if hashes are identical
        """
    
    def __ne__(self, other):
        """
        Check if two hashes are not equal.
        
        Args:
            other (object): Other hash to compare
        
        Returns:
            bool: True if hashes are different
        """
    
    def __hash__(self):
        """
        Return 8-bit integer hash for dictionary keys.
        
        Returns:
            int: 8-bit hash value for use as dictionary key
        """
    
    def __len__(self):
        """
        Return bit length of the hash.
        
        Returns:
            int: Total number of bits in the hash
        """

Usage Example:

from PIL import Image
import imagehash

# Create hashes
image1 = Image.open('photo1.jpg')
image2 = Image.open('photo2.jpg')

hash1 = imagehash.average_hash(image1)
hash2 = imagehash.average_hash(image2)

# Basic operations
print(f"Hash 1: {hash1}")          # String representation
print(f"Hash length: {len(hash1)}")  # Bit length (64 for 8x8 hash)

# Comparison operations
distance = hash1 - hash2           # Hamming distance
are_equal = hash1 == hash2         # Exact equality
are_different = hash1 != hash2     # Inequality

print(f"Hamming distance: {distance}")
print(f"Are equal: {are_equal}")

# Use as dictionary key
hash_dict = {hash1: 'photo1.jpg', hash2: 'photo2.jpg'}
filename = hash_dict.get(hash1)   # Retrieve using hash as key

# Hash comparison with tolerance
similarity_threshold = 5
are_similar = distance < similarity_threshold

ImageMultiHash Class

Container for multiple hashes used in crop-resistant hashing, with advanced matching capabilities.

class ImageMultiHash:
    def __init__(self, hashes):
        """
        Initialize with list of ImageHash objects.
        
        Args:
            hashes (list[ImageHash]): List of individual segment hashes
        """
    
    def __eq__(self, other):
        """
        Check equality using matches method.
        
        Args:
            other (object): Other multi-hash to compare
        
        Returns:
            bool: True if hashes match
        """
    
    def __ne__(self, other):
        """
        Check inequality.
        
        Args:
            other (object): Other multi-hash to compare
        
        Returns:
            bool: True if hashes don't match
        """
    
    def __sub__(self, other, hamming_cutoff=None, bit_error_rate=None):
        """
        Calculate distance score between multi-hashes.
        
        Args:
            other (ImageMultiHash): Other multi-hash to compare
            hamming_cutoff (float, optional): Maximum hamming distance threshold
            bit_error_rate (float, optional): Percentage of bits that can differ (default: 0.25)
        
        Returns:
            float: Distance score (lower = more similar)
        """
    
    def __hash__(self):
        """
        Return hash of tuple of segment hashes.
        
        Returns:
            int: Hash value for dictionary usage
        """
    
    def __str__(self):
        """
        Return comma-separated string of segment hashes.
        
        Returns:
            str: Comma-separated hex strings
        """
    
    def __repr__(self):
        """
        Return representation of segment hashes list.
        
        Returns:
            str: String representation of hash list
        """
    
    def hash_diff(self, other_hash, hamming_cutoff=None, bit_error_rate=None):
        """
        Get difference metrics between two multi-hashes.
        
        Args:
            other_hash (ImageMultiHash): Other multi-hash to compare
            hamming_cutoff (float, optional): Maximum hamming distance threshold
            bit_error_rate (float, optional): Bit error rate (default: 0.25)
        
        Returns:
            tuple[int, int]: (number_of_matching_segments, sum_of_hamming_distances)
        """
    
    def matches(self, other_hash, region_cutoff=1, hamming_cutoff=None, bit_error_rate=None):
        """
        Check if multi-hash matches another with configurable thresholds.
        
        Args:
            other_hash (ImageMultiHash): Other multi-hash to compare
            region_cutoff (int): Minimum matching regions required (default: 1)
            hamming_cutoff (float, optional): Maximum hamming distance per region
            bit_error_rate (float, optional): Bit error rate tolerance (default: 0.25)
        
        Returns:
            bool: True if hashes match according to criteria
        """
    
    def best_match(self, other_hashes, hamming_cutoff=None, bit_error_rate=None):
        """
        Find best matching hash from a list of candidates.
        
        Args:
            other_hashes (list[ImageMultiHash]): List of candidate hashes
            hamming_cutoff (float, optional): Maximum hamming distance threshold
            bit_error_rate (float, optional): Bit error rate tolerance (default: 0.25)
        
        Returns:
            ImageMultiHash: Best matching hash from the list
        """

Usage Example:

from PIL import Image
import imagehash

# Create crop-resistant hashes
full_image = Image.open('full_photo.jpg')
cropped_image = Image.open('cropped_photo.jpg')

full_hash = imagehash.crop_resistant_hash(full_image)
crop_hash = imagehash.crop_resistant_hash(cropped_image)

# Basic matching
matches = full_hash.matches(crop_hash)
print(f"Images match: {matches}")

# Flexible matching with custom thresholds
strict_match = full_hash.matches(
    crop_hash,
    region_cutoff=2,      # Require at least 2 matching regions
    bit_error_rate=0.15   # Allow 15% bit differences
)

# Get detailed comparison metrics
num_matches, total_distance = full_hash.hash_diff(crop_hash)
print(f"Matching segments: {num_matches}")
print(f"Total distance: {total_distance}")

# Distance scoring
similarity_score = full_hash - crop_hash
print(f"Similarity score: {similarity_score}")

# Find best match from multiple candidates
candidates = [
    imagehash.crop_resistant_hash(Image.open('candidate1.jpg')),
    imagehash.crop_resistant_hash(Image.open('candidate2.jpg')),
    imagehash.crop_resistant_hash(Image.open('candidate3.jpg'))
]

best_match = full_hash.best_match(candidates)
print(f"Best match: {best_match}")

# Use as dictionary key
multi_hash_dict = {full_hash: 'full_photo.jpg'}

Advanced Usage Patterns

Batch Hash Comparison

# Compare one hash against many
target_hash = imagehash.average_hash(target_image)
image_hashes = [
    imagehash.average_hash(img) for img in image_list
]

# Find all similar images
similar_images = []
for i, img_hash in enumerate(image_hashes):
    distance = target_hash - img_hash
    if distance < 10:  # similarity threshold
        similar_images.append((i, distance))

# Sort by similarity
similar_images.sort(key=lambda x: x[1])

Hash Tolerance Configuration

# Crop-resistant matching with different tolerance levels
strict_tolerance = multi_hash1.matches(
    multi_hash2,
    region_cutoff=3,      # Need 3+ matching regions
    bit_error_rate=0.10   # Only 10% bit differences allowed
)

loose_tolerance = multi_hash1.matches(
    multi_hash2,
    region_cutoff=1,      # Only 1 matching region needed
    bit_error_rate=0.35   # Allow 35% bit differences
)

Custom Hash Functions with Classes

# Create custom hash class wrapper
class CustomImageHash(imagehash.ImageHash):
    def __init__(self, binary_array, metadata=None):
        super().__init__(binary_array)
        self.metadata = metadata or {}
    
    def similarity_percentage(self, other):
        distance = self - other
        max_distance = len(self)
        return (1 - distance / max_distance) * 100

# Usage
custom_hash = CustomImageHash(
    imagehash.average_hash(image).hash,
    metadata={'filename': 'image.jpg', 'algorithm': 'average'}
)

Error Handling

try:
    # Hash comparison with error handling
    distance = hash1 - hash2
except TypeError as e:
    if "must not be None" in str(e):
        print("One of the hashes is None")
    elif "same shape" in str(e):
        print("Hashes have different sizes/shapes")
    else:
        raise

# Safe hash comparison
def safe_compare_hashes(hash1, hash2):
    if hash1 is None or hash2 is None:
        return None
    
    try:
        return hash1 - hash2
    except TypeError:
        return None  # Incompatible hash types/sizes

docs

core-classes.md

crop-resistant-hashing.md

hash-conversion.md

hash-generation.md

index.md

tile.json