tessl install tessl/maven-org-apache-spark--spark-unsafe_2-13@3.5.0Low-level unsafe operations and optimized data structures for Apache Spark's internal memory management and performance-critical operations.
Low-level unsafe operations and optimized data structures for Apache Spark's internal memory management and performance-critical operations. This module provides direct memory access capabilities, UTF-8 string processing, hash algorithms, and specialized array implementations designed for maximum performance in memory-constrained environments.
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-unsafe_2.13</artifactId>
<version>3.5.6</version>
</dependency>import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.types.UTF8String;
import org.apache.spark.unsafe.memory.MemoryAllocator;
import org.apache.spark.unsafe.memory.MemoryBlock;
import org.apache.spark.unsafe.hash.Murmur3_x86_32;For array operations:
import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.array.ByteArrayMethods;import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.types.UTF8String;
import org.apache.spark.unsafe.memory.MemoryAllocator;
// Direct memory allocation
MemoryAllocator allocator = MemoryAllocator.UNSAFE;
MemoryBlock block = allocator.allocate(1024);
// UTF-8 string operations
UTF8String str1 = UTF8String.fromString("Hello");
UTF8String str2 = UTF8String.fromString(" World");
UTF8String result = UTF8String.concat(str1, str2);
// Platform memory operations
long address = Platform.allocateMemory(64);
Platform.putLong(null, address, 12345L);
long value = Platform.getLong(null, address);
Platform.freeMemory(address);
// Hash calculation
Murmur3_x86_32 hasher = new Murmur3_x86_32(42);
int hash = hasher.hashLong(12345L);The unsafe module is organized into focused capabilities:
Core platform utilities for unsafe memory operations, direct memory access, and JVM intrinsics that bypass standard Java safety mechanisms for maximum performance.
public static int getInt(Object object, long offset);
public static void putInt(Object object, long offset, int value);
public static long allocateMemory(long size);
public static void freeMemory(long address);
public static void copyMemory(Object src, long srcOffset, Object dst, long dstOffset, long length);Abstract memory allocators supporting both heap and off-heap memory allocation with pooling, debug capabilities, and memory block abstractions for TaskMemoryManager integration.
public interface MemoryAllocator {
MemoryBlock allocate(long size) throws OutOfMemoryError;
void free(MemoryBlock memory);
}
public class MemoryBlock extends MemoryLocation {
public long size();
public void fill(byte value);
}High-performance UTF-8 encoded string implementation with extensive string manipulation capabilities, optimized for internal Spark operations and zero-copy scenarios.
public final class UTF8String implements Comparable<UTF8String>, Externalizable {
public static UTF8String fromString(String str);
public static UTF8String concat(UTF8String... inputs);
public UTF8String substring(int start, int until);
public boolean contains(UTF8String substring);
public String toString();
}Optimized array implementations and manipulation utilities including long arrays supporting both on-heap and off-heap memory, and byte array operations with pattern matching.
public final class LongArray {
public LongArray(MemoryBlock memory);
public long get(int index);
public void set(int index, long value);
}
public static boolean arrayEquals(Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length);High-performance hash function implementations including 32-bit Murmur3 hasher and Hive-compatible hashing for data compatibility across systems.
public final class Murmur3_x86_32 {
public Murmur3_x86_32(int seed);
public int hashInt(int input);
public int hashLong(long input);
public int hashUnsafeWords(Object base, long offset, int lengthInBytes);
}Specialized data types including calendar intervals, byte array utilities, bitset operations, and date/time constants for temporal calculations.
public final class CalendarInterval implements Serializable {
public CalendarInterval(int months, int days, long microseconds);
public final int months;
public final int days;
public final long microseconds;
}Efficient bitset manipulation utilities for working with fixed-size uncompressed bitsets in memory, designed for columnar data processing and efficient bit-level operations.
public static void set(Object baseObject, long baseOffset, int index);
public static void unset(Object baseObject, long baseOffset, int index);
public static boolean isSet(Object baseObject, long baseOffset, int index);
public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords);
public static int nextSetBit(Object baseObject, long baseOffset, int fromIndex, int bitsetSizeInWords);Abstract iterator interface for traversing key-value pairs in Spark's unsafe operations with proper resource management and type safety.
public abstract class KVIterator<K, V> {
public abstract boolean next() throws IOException;
public abstract K getKey();
public abstract V getValue();
public abstract void close();
}High-performance utilities for byte array operations including memory copying, binary comparison, substring extraction, concatenation, and padding operations.
public static void writeToMemory(byte[] src, Object target, long targetOffset);
public static long getPrefix(byte[] bytes);
public static int compareBinary(byte[] leftBase, byte[] rightBase);
public static byte[] subStringSQL(byte[] bytes, int pos, int len);
public static byte[] concat(byte[]... inputs);
public static byte[] lpad(byte[] bytes, int len, byte[] pad);
public static byte[] rpad(byte[] bytes, int len, byte[] pad);