Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions.
—
hashlib-compatible hasher classes for incremental hashing of large datasets and streaming operations. These classes allow you to hash data incrementally, making them ideal for processing large files, network streams, or when you don't have all data available at once.
Abstract base class defining the common interface for all streaming hashers.
class Hasher:
"""Base class for streaming MurmurHash3 hashers."""
def __init__(self, seed: int = 0) -> None:
"""
Initialize hasher with optional seed.
Args:
seed: Seed value for hash computation (default: 0)
"""
def update(self, input: Hashable) -> None:
"""
Update hasher with new data.
Args:
input: Data to add to hash (bytes, bytearray, memoryview, or array-like)
Raises:
TypeError: If input is a string (strings must be encoded first)
"""
def digest(self) -> bytes:
"""
Get hash digest as bytes.
Returns:
Hash digest as bytes
"""
def sintdigest(self) -> int:
"""
Get hash as signed integer.
Returns:
Hash value as signed integer
"""
def uintdigest(self) -> int:
"""
Get hash as unsigned integer.
Returns:
Hash value as unsigned integer
"""
def copy(self) -> Hasher:
"""
Create a copy of the hasher's current state.
Returns:
New hasher instance with identical state
"""
@property
def digest_size(self) -> int:
"""
Get digest size in bytes.
Returns:
Number of bytes in digest output
"""
@property
def block_size(self) -> int:
"""
Get internal block size in bytes.
Returns:
Number of bytes processed in each internal block
"""
@property
def name(self) -> str:
"""
Get hasher algorithm name.
Returns:
String identifying the hash algorithm
"""Streaming hasher for 32-bit MurmurHash3 computation.
class mmh3_32(Hasher):
"""32-bit MurmurHash3 streaming hasher."""Properties:
digest_size: 4 bytesblock_size: 12 bytesname: "mmh3_32"Example usage:
import mmh3
# Basic streaming hashing
hasher = mmh3.mmh3_32()
hasher.update(b"foo")
hasher.update(b"bar")
# Get results in different formats
digest = hasher.digest() # b'\x8d\x8f\xe7\xfd'
signed_int = hasher.sintdigest() # -156908512
unsigned_int = hasher.uintdigest() # 4138058784
# With custom seed
hasher = mmh3.mmh3_32(seed=42)
hasher.update(b"Hello, world!")
result = hasher.sintdigest() # Hash with seed
# Copy hasher state
hasher1 = mmh3.mmh3_32()
hasher1.update(b"partial data")
hasher2 = hasher1.copy() # Copy current state
hasher2.update(b" more data") # Continue from copyStreaming hasher for 128-bit MurmurHash3 optimized for x64 architectures.
class mmh3_x64_128(Hasher):
"""128-bit MurmurHash3 streaming hasher optimized for x64 architectures."""
def stupledigest(self) -> tuple[int, int]:
"""
Get hash as tuple of two signed 64-bit integers.
Returns:
Tuple of two signed 64-bit integers representing the 128-bit hash
"""
def utupledigest(self) -> tuple[int, int]:
"""
Get hash as tuple of two unsigned 64-bit integers.
Returns:
Tuple of two unsigned 64-bit integers representing the 128-bit hash
"""Properties:
digest_size: 16 bytesblock_size: 32 bytesname: "mmh3_x64_128"Example usage:
import mmh3
# 128-bit streaming hashing (x64 optimized)
hasher = mmh3.mmh3_x64_128(seed=42)
hasher.update(b"foo")
hasher.update(b"bar")
# Get results in various formats
digest = hasher.digest() # 16-byte digest
signed_int = hasher.sintdigest() # 128-bit signed integer
unsigned_int = hasher.uintdigest() # 128-bit unsigned integer
# Get as tuple of 64-bit integers
signed_tuple = hasher.stupledigest() # (7689522670935629698, -159584473158936081)
unsigned_tuple = hasher.utupledigest() # (7689522670935629698, 18287159600550615535)
# Process large streaming data
hasher = mmh3.mmh3_x64_128()
for chunk in data_stream:
hasher.update(chunk)
final_hash = hasher.digest()Streaming hasher for 128-bit MurmurHash3 optimized for x86 architectures.
class mmh3_x86_128(Hasher):
"""128-bit MurmurHash3 streaming hasher optimized for x86 architectures."""
def stupledigest(self) -> tuple[int, int]:
"""
Get hash as tuple of two signed 64-bit integers.
Returns:
Tuple of two signed 64-bit integers representing the 128-bit hash
"""
def utupledigest(self) -> tuple[int, int]:
"""
Get hash as tuple of two unsigned 64-bit integers.
Returns:
Tuple of two unsigned 64-bit integers representing the 128-bit hash
"""Properties:
digest_size: 16 bytesblock_size: 32 bytesname: "mmh3_x86_128"Example usage:
import mmh3
# 128-bit streaming hashing (x86 optimized)
hasher = mmh3.mmh3_x86_128(seed=123)
hasher.update(b"data chunk 1")
hasher.update(b"data chunk 2")
# Get results (same interface as x64 version)
digest = hasher.digest()
signed_tuple = hasher.stupledigest()
unsigned_tuple = hasher.utupledigest()
# Architecture-specific optimization
# Use mmh3_x86_128 on 32-bit systems for better performance
# Use mmh3_x64_128 on 64-bit systems for better performanceimport mmh3
def hash_large_file(filename):
hasher = mmh3.mmh3_x64_128()
with open(filename, 'rb') as f:
while chunk := f.read(8192): # 8KB chunks
hasher.update(chunk)
return hasher.digest()
file_hash = hash_large_file('large_dataset.bin')import mmh3
def process_data_stream(data_stream):
hasher = mmh3.mmh3_32(seed=42)
for data_chunk in data_stream:
# Process data
processed = process_chunk(data_chunk)
# Update hash incrementally
hasher.update(processed)
return hasher.uintdigest()import mmh3
# Process common prefix once
base_hasher = mmh3.mmh3_x64_128()
base_hasher.update(b"common prefix data")
# Branch processing from common state
hasher1 = base_hasher.copy()
hasher1.update(b"branch 1 data")
result1 = hasher1.digest()
hasher2 = base_hasher.copy()
hasher2.update(b"branch 2 data")
result2 = hasher2.digest()Hashers only accept binary data types:
bytes, bytearray, memoryview, array-like objectsstr - strings must be encoded firstimport mmh3
hasher = mmh3.mmh3_32()
# Correct usage
hasher.update(b"binary data")
hasher.update("text data".encode('utf-8')) # Encode strings first
# This will raise TypeError
# hasher.update("raw string") # TypeError: Strings must be encoded before hashingAll hasher instances are independent and thread-safe. However, individual hasher objects should not be shared between threads without proper synchronization.
copy() is lightweight and creates minimal overheadupdate()digest(), sintdigest(), uintdigest()copy() if neededInstall with Tessl CLI
npx tessl i tessl/pypi-mmh3