A library for summarizing data in streams for which it is infeasible to store all events
—
Fast, well-distributed hash functions optimized for use in probabilistic data structures and general hash-based lookup. These implementations provide high-quality hash values with good distribution properties and performance characteristics.
Fast, non-cryptographic hash function suitable for general hash-based lookup with excellent distribution properties and performance.
/**
* MurmurHash implementation - fast, well-distributed hash function
*/
public class MurmurHash {
/**
* Hash any object using its toString() method
* @param o object to hash
* @return 32-bit hash value
*/
public static int hash(Object o);
/**
* Hash byte array
* @param data byte array to hash
* @return 32-bit hash value
*/
public static int hash(byte[] data);
/**
* Hash byte array with seed
* @param data byte array to hash
* @param seed seed value for hash function
* @return 32-bit hash value
*/
public static int hash(byte[] data, int seed);
/**
* Hash byte array with length and seed
* @param data byte array to hash
* @param length number of bytes to hash
* @param seed seed value for hash function
* @return 32-bit hash value
*/
public static int hash(byte[] data, int length, int seed);
/**
* Hash long value directly
* @param data long value to hash
* @return 32-bit hash value
*/
public static int hashLong(long data);
/**
* 64-bit hash of object using its toString() method
* @param o object to hash
* @return 64-bit hash value
*/
public static long hash64(Object o);
/**
* 64-bit hash of byte array
* @param data byte array to hash
* @param length number of bytes to hash
* @return 64-bit hash value
*/
public static long hash64(final byte[] data, int length);
/**
* 64-bit hash of byte array with seed
* @param data byte array to hash
* @param length number of bytes to hash
* @param seed seed value for hash function
* @return 64-bit hash value
*/
public static long hash64(final byte[] data, int length, int seed);
}Usage Examples:
import com.clearspring.analytics.hash.MurmurHash;
// Hash strings and objects
String text = "hello world";
int hash1 = MurmurHash.hash(text);
int hash2 = MurmurHash.hash(text.getBytes());
// Hash with custom seed for different hash functions
int hash3 = MurmurHash.hash(text.getBytes(), 12345);
// Hash long values directly
long value = 1234567890L;
int longHash = MurmurHash.hashLong(value);
// Get 64-bit hashes for more bits and less collisions
long hash64 = MurmurHash.hash64(text);
long hash64WithSeed = MurmurHash.hash64(text.getBytes(), text.length(), 54321);
System.out.println("32-bit hash: " + hash1);
System.out.println("64-bit hash: " + hash64);Fast, well-distributed, cross-platform hash functions based on Bob Jenkins' lookup3 algorithm with variations optimized for different use cases.
/**
* Lookup3Hash implementation based on Bob Jenkins' lookup3 algorithm
*/
public class Lookup3Hash {
/**
* Java implementation of hashword from lookup3.c
* @param k array of integers to hash
* @param offset starting offset in array
* @param length number of integers to hash
* @param initval initial value (seed)
* @return 32-bit hash value
*/
public static int lookup3(int[] k, int offset, int length, int initval);
/**
* Modified lookup3 with biased initial value for ycs (Yahoo Cloud Serving)
* @param k array of integers to hash
* @param offset starting offset in array
* @param length number of integers to hash
* @param initval initial value (seed)
* @return 32-bit hash value
*/
public static int lookup3ycs(int[] k, int offset, int length, int initval);
/**
* Hash character sequence using lookup3ycs algorithm
* @param s character sequence to hash
* @param start starting character index
* @param end ending character index (exclusive)
* @param initval initial value (seed)
* @return 32-bit hash value
*/
public static int lookup3ycs(CharSequence s, int start, int end, int initval);
/**
* 64-bit version of lookup3ycs for character sequences
* @param s character sequence to hash
* @param start starting character index
* @param end ending character index (exclusive)
* @param initval initial value (seed)
* @return 64-bit hash value
*/
public static long lookup3ycs64(CharSequence s, int start, int end, long initval);
/**
* Convenience method: 64-bit hash of entire character sequence
* @param s character sequence to hash
* @return 64-bit hash value using default parameters
*/
public static long lookup3ycs64(CharSequence s);
}Usage Examples:
import com.clearspring.analytics.hash.Lookup3Hash;
// Hash integer arrays
int[] data = {1, 2, 3, 4, 5};
int hash = Lookup3Hash.lookup3(data, 0, data.length, 0);
// Hash strings with different variants
String text = "sample text";
int hashYcs = Lookup3Hash.lookup3ycs(text.toCharArray(), 0, text.length(), 0);
// Hash substrings
String longText = "this is a longer string for testing";
int substringHash = Lookup3Hash.lookup3ycs(longText, 5, 15, 12345);
// Get 64-bit hashes for better distribution
long hash64 = Lookup3Hash.lookup3ycs64(text, 0, text.length(), 0);
long simpleHash64 = Lookup3Hash.lookup3ycs64(text);
System.out.println("Lookup3 hash: " + hash);
System.out.println("64-bit hash: " + hash64);// For Bloom filters - need multiple independent hash functions
public class MultiHashBloomFilter {
private static final int[] SEEDS = {1, 3, 7, 11, 13, 17, 19, 23};
private int[] getHashValues(String key, int numHashes) {
int[] hashes = new int[numHashes];
byte[] bytes = key.getBytes();
for (int i = 0; i < numHashes; i++) {
hashes[i] = Math.abs(MurmurHash.hash(bytes, SEEDS[i]));
}
return hashes;
}
}// Use 64-bit hashes when avoiding collisions is critical
public class DistributedHashTable {
private static final int BUCKET_COUNT = 1000000;
public int getBucket(String key) {
// Use 64-bit hash to minimize collisions
long hash = MurmurHash.hash64(key);
// Map to bucket using modulo (take absolute value first)
return Math.abs((int)(hash % BUCKET_COUNT));
}
}// Create multiple hash functions with different seeds
public class ConsistentHashRing {
private static final int VIRTUAL_NODES = 100;
private void addNode(String nodeId) {
for (int i = 0; i < VIRTUAL_NODES; i++) {
// Use different seeds to create virtual nodes
int hash = MurmurHash.hash(nodeId.getBytes(), i);
ring.put(hash, nodeId);
}
}
}// Different ways to hash strings - choose based on performance needs
public class StringHashingComparison {
public void demonstrateHashingMethods(String text) {
// Method 1: Hash object directly (uses toString())
int hash1 = MurmurHash.hash(text);
// Method 2: Convert to bytes first (more control)
byte[] bytes = text.getBytes(StandardCharsets.UTF_8);
int hash2 = MurmurHash.hash(bytes);
// Method 3: Use Lookup3 for character sequences (avoids byte conversion)
int hash3 = Lookup3Hash.lookup3ycs(text, 0, text.length(), 0);
// Method 4: 64-bit for high quality
long hash4 = MurmurHash.hash64(bytes, bytes.length);
System.out.println("Object hash: " + hash1);
System.out.println("Byte hash: " + hash2);
System.out.println("Lookup3 hash: " + hash3);
System.out.println("64-bit hash: " + hash4);
}
}// Optimize for performance in tight loops
public class PerformanceOptimizedHashing {
// Pre-allocate byte arrays to avoid garbage collection
private final byte[] reuseableBuffer = new byte[1024];
public int fastHash(String input) {
// For short strings, Lookup3 on character sequence is fastest
if (input.length() < 50) {
return Lookup3Hash.lookup3ycs(input, 0, input.length(), 0);
}
// For longer strings, convert to bytes once and reuse
byte[] bytes = input.getBytes(StandardCharsets.UTF_8);
return MurmurHash.hash(bytes, bytes.length, 0);
}
public int hashLongValue(long value) {
// Specialized method for long values
return MurmurHash.hashLong(value);
}
}// Partition data across multiple processors/storage locations
public class DataPartitioner {
private final int partitionCount;
public DataPartitioner(int partitionCount) {
this.partitionCount = partitionCount;
}
public int getPartition(String key) {
// Use high-quality hash to ensure even distribution
int hash = MurmurHash.hash(key.getBytes());
// Map to partition (ensure positive result)
return Math.abs(hash) % partitionCount;
}
public int getPartition(long key) {
// Specialized version for numeric keys
int hash = MurmurHash.hashLong(key);
return Math.abs(hash) % partitionCount;
}
}Use MurmurHash when:
Use Lookup3Hash when:
Use 32-bit hashes when:
Use 64-bit hashes when:
MurmurHash:
Lookup3Hash:
Both hash functions provide:
These properties make them suitable for use in:
Install with Tessl CLI
npx tessl i tessl/maven-com-clearspring-analytics--stream