Low-level memory management and unsafe operations library for Apache Spark with direct memory access and optimized data structures
Efficient array implementations and utility methods for byte arrays and long arrays with support for both heap and off-heap memory storage, providing optimized operations for Spark's internal data processing needs.
Memory-efficient array of long values that can be backed by either heap or off-heap memory, designed for high-performance numerical operations.
/**
* Array of long values supporting on-heap and off-heap memory
*/
final class LongArray {
/**
* Create LongArray backed by a memory block
* @param memory MemoryBlock providing storage
*/
public LongArray(MemoryBlock memory);
/**
* Get the underlying memory block
* @return MemoryBlock backing this array
*/
public MemoryBlock memoryBlock();
/**
* Get base object for memory access (null for off-heap)
* @return Base object or null
*/
public Object getBaseObject();
/**
* Get base offset within object or direct address
* @return Offset or address
*/
public long getBaseOffset();
/**
* Get number of long elements in array
* @return Array size in elements
*/
public long size();
/**
* Fill entire array with zeros
*/
public void zeroOut();
/**
* Set value at specified index
* @param index Element index
* @param value Long value to store
*/
public void set(int index, long value);
/**
* Get value at specified index
* @param index Element index
* @return Long value at index
*/
public long get(int index);
}Usage Examples:
import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.memory.*;
// Create LongArray with heap storage
MemoryAllocator allocator = MemoryAllocator.HEAP;
MemoryBlock block = allocator.allocate(800); // 100 longs * 8 bytes
LongArray array = new LongArray(block);
try {
// Initialize array
array.zeroOut();
// Set values
for (int i = 0; i < 10; i++) {
array.set(i, i * 10L);
}
// Read values
for (int i = 0; i < 10; i++) {
long value = array.get(i);
System.out.println("array[" + i + "] = " + value);
}
// Get array properties
long size = array.size();
Object baseObj = array.getBaseObject();
} finally {
allocator.free(block);
}
// Create LongArray with off-heap storage
MemoryAllocator unsafeAllocator = MemoryAllocator.UNSAFE;
MemoryBlock offHeapBlock = unsafeAllocator.allocate(800);
LongArray offHeapArray = new LongArray(offHeapBlock);
// ... use array ...
unsafeAllocator.free(offHeapBlock);Comprehensive utility methods for byte array operations including comparisons, searching, and memory-related calculations.
/**
* Utility methods for byte array operations with unsafe optimizations
*/
class ByteArrayMethods {
// Constants
/**
* Maximum safe array length for rounded arrays
*/
public static final int MAX_ROUNDED_ARRAY_LENGTH;
// Memory alignment utilities
/**
* Get next power of 2 greater than or equal to num
* @param num Input number
* @return Next power of 2
*/
public static long nextPowerOf2(long num);
/**
* Round number of bytes to nearest word boundary
* @param numBytes Number of bytes
* @return Rounded byte count
*/
public static int roundNumberOfBytesToNearestWord(int numBytes);
/**
* Round number of bytes to nearest word boundary (long version)
* @param numBytes Number of bytes
* @return Rounded byte count
*/
public static long roundNumberOfBytesToNearestWord(long numBytes);
// Array comparison operations
/**
* Compare two byte arrays for equality using unsafe operations
* @param leftBase Left array base object (null for off-heap)
* @param leftOffset Left array offset or address
* @param rightBase Right array base object (null for off-heap)
* @param rightOffset Right array offset or address
* @param length Number of bytes to compare
* @return true if arrays are equal
*/
public static boolean arrayEquals(Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length);
// Array search operations
/**
* Check if byte array contains sub-array
* @param arr Main byte array
* @param sub Sub-array to search for
* @return true if sub-array is found
*/
public static boolean contains(byte[] arr, byte[] sub);
/**
* Check if array starts with target sub-array
* @param array Main byte array
* @param target Target sub-array
* @return true if array starts with target
*/
public static boolean startsWith(byte[] array, byte[] target);
/**
* Check if array ends with target sub-array
* @param array Main byte array
* @param target Target sub-array
* @return true if array ends with target
*/
public static boolean endsWith(byte[] array, byte[] target);
/**
* Check if sub-array matches at specific position
* @param arr Main byte array
* @param sub Sub-array to match
* @param pos Position to check match
* @return true if sub-array matches at position
*/
public static boolean matchAt(byte[] arr, byte[] sub, int pos);
}Usage Examples:
import org.apache.spark.unsafe.array.ByteArrayMethods;
// Memory alignment calculations
long size = 1000;
long powerOf2 = ByteArrayMethods.nextPowerOf2(size); // Next power of 2 >= 1000
int alignedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(37); // Round to word boundary
// Array comparisons using unsafe operations
byte[] array1 = "Hello World".getBytes();
byte[] array2 = "Hello World".getBytes();
boolean areEqual = ByteArrayMethods.arrayEquals(
array1, Platform.BYTE_ARRAY_OFFSET,
array2, Platform.BYTE_ARRAY_OFFSET,
array1.length
);
// Array searching operations
byte[] mainArray = "Hello World Program".getBytes();
byte[] searchFor = "World".getBytes();
boolean contains = ByteArrayMethods.contains(mainArray, searchFor);
boolean starts = ByteArrayMethods.startsWith(mainArray, "Hello".getBytes());
boolean ends = ByteArrayMethods.endsWith(mainArray, "Program".getBytes());
boolean matches = ByteArrayMethods.matchAt(mainArray, "World".getBytes(), 6);
System.out.println("Contains 'World': " + contains);
System.out.println("Starts with 'Hello': " + starts);
System.out.println("Ends with 'Program': " + ends);
System.out.println("'World' at position 6: " + matches);Specialized utilities for byte array operations including comparison, manipulation, and SQL-style string operations.
/**
* Utility methods for byte array operations and comparisons
*/
final class ByteArray {
// Constants
/**
* Empty byte array constant
*/
public static final byte[] EMPTY_BYTE;
// Memory operations
/**
* Write byte array to memory location
* @param src Source byte array
* @param target Target object (null for off-heap)
* @param targetOffset Target offset or address
*/
public static void writeToMemory(byte[] src, Object target, long targetOffset);
/**
* Get sorting prefix from byte array
* @param bytes Byte array
* @return Long prefix for sorting
*/
public static long getPrefix(byte[] bytes);
// Comparison operations
/**
* Compare two byte arrays lexicographically
* @param leftBase Left byte array
* @param rightBase Right byte array
* @return Comparison result (negative, zero, positive)
*/
public static int compareBinary(byte[] leftBase, byte[] rightBase);
// String-like operations
/**
* Extract substring from byte array (SQL-style, 1-based)
* @param bytes Source byte array
* @param pos Starting position (1-based)
* @param len Length of substring
* @return Substring as byte array
*/
public static byte[] subStringSQL(byte[] bytes, int pos, int len);
/**
* Concatenate multiple byte arrays
* @param inputs Byte arrays to concatenate
* @return Concatenated byte array
*/
public static byte[] concat(byte[]... inputs);
/**
* Left pad byte array to specified length
* @param bytes Source byte array
* @param len Target length
* @param pad Padding bytes
* @return Left-padded byte array
*/
public static byte[] lpad(byte[] bytes, int len, byte[] pad);
/**
* Right pad byte array to specified length
* @param bytes Source byte array
* @param len Target length
* @param pad Padding bytes
* @return Right-padded byte array
*/
public static byte[] rpad(byte[] bytes, int len, byte[] pad);
}Usage Examples:
import org.apache.spark.unsafe.types.ByteArray;
import org.apache.spark.unsafe.Platform;
// Memory operations
byte[] data = "Hello".getBytes();
byte[] buffer = new byte[100];
ByteArray.writeToMemory(data, buffer, Platform.BYTE_ARRAY_OFFSET);
long prefix = ByteArray.getPrefix(data);
// Array comparison
byte[] array1 = "apple".getBytes();
byte[] array2 = "banana".getBytes();
int comparison = ByteArray.compareBinary(array1, array2); // negative
// String-like operations on byte arrays
byte[] source = "Hello World".getBytes();
// Extract substring (SQL-style, 1-based indexing)
byte[] substring = ByteArray.subStringSQL(source, 7, 5); // "World"
// Concatenation
byte[] part1 = "Hello".getBytes();
byte[] part2 = " ".getBytes();
byte[] part3 = "World".getBytes();
byte[] concatenated = ByteArray.concat(part1, part2, part3);
// Padding operations
byte[] text = "Hi".getBytes();
byte[] spaces = " ".getBytes();
byte[] leftPadded = ByteArray.lpad(text, 10, spaces); // " Hi"
byte[] rightPadded = ByteArray.rpad(text, 10, spaces); // "Hi "
// Working with empty arrays
byte[] empty = ByteArray.EMPTY_BYTE;
System.out.println("Empty array length: " + empty.length);ByteArrayMethods provides utilities for optimizing memory access through proper alignment:
// Calculate optimal buffer sizes
int originalSize = 1000;
int wordAligned = ByteArrayMethods.roundNumberOfBytesToNearestWord(originalSize);
// Use power-of-2 sizes for better memory allocation
long optimalSize = ByteArrayMethods.nextPowerOf2(originalSize);For maximum performance, use unsafe array comparison methods:
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;
// Fast array comparison using unsafe operations
byte[] array1 = getData1();
byte[] array2 = getData2();
boolean equal = ByteArrayMethods.arrayEquals(
array1, Platform.BYTE_ARRAY_OFFSET,
array2, Platform.BYTE_ARRAY_OFFSET,
Math.min(array1.length, array2.length)
);When working with LongArray, consider memory allocation strategy:
// For temporary arrays, use heap allocation
MemoryAllocator heapAllocator = MemoryAllocator.HEAP;
MemoryBlock heapBlock = heapAllocator.allocate(elementCount * 8);
LongArray heapArray = new LongArray(heapBlock);
// For long-lived arrays, consider off-heap allocation
MemoryAllocator offHeapAllocator = MemoryAllocator.UNSAFE;
MemoryBlock offHeapBlock = offHeapAllocator.allocate(elementCount * 8);
LongArray offHeapArray = new LongArray(offHeapBlock);Always ensure proper cleanup of allocated memory:
import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.memory.*;
public void processLongArray(int size) {
MemoryAllocator allocator = MemoryAllocator.HEAP;
MemoryBlock block = null;
try {
block = allocator.allocate(size * 8);
LongArray array = new LongArray(block);
// Use the array
array.zeroOut();
for (int i = 0; i < size; i++) {
array.set(i, i * 2L);
}
// Process data
processData(array);
} catch (OutOfMemoryError e) {
System.err.println("Failed to allocate array: " + e.getMessage());
} finally {
if (block != null) {
allocator.free(block);
}
}
}Consider maximum array size limits:
// Check against maximum safe array length
int requestedSize = calculateRequiredSize();
if (requestedSize > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
throw new IllegalArgumentException("Array size too large: " + requestedSize);
}
int safeSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(requestedSize);Array operations integrate seamlessly with Platform methods:
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.LongArray;
LongArray array = createLongArray(100);
// Direct memory access using Platform
Object baseObj = array.getBaseObject();
long baseOffset = array.getBaseOffset();
// Set value using Platform (equivalent to array.set(0, 42L))
Platform.putLong(baseObj, baseOffset, 42L);
// Get value using Platform (equivalent to array.get(0))
long value = Platform.getLong(baseObj, baseOffset);LongArray works directly with MemoryBlock instances:
// Create from existing MemoryBlock
MemoryBlock existingBlock = allocateMemoryBlock(800);
LongArray array = new LongArray(existingBlock);
// Access underlying MemoryBlock
MemoryBlock block = array.memoryBlock();
long blockSize = block.size();
int pageNumber = block.pageNumber;Install with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-unsafe-2-12