The Platform class provides the foundation for all unsafe memory operations in Spark, wrapping Java's sun.misc.Unsafe for high-performance direct memory access. This class is critical for bypassing Java's safety mechanisms to achieve maximum performance in data processing operations.
import org.apache.spark.unsafe.Platform;// Allocate 1024 bytes of off-heap memory
long address = Platform.allocateMemory(1024);
// Write data to memory
Platform.putLong(null, address, 42L);
Platform.putInt(null, address + 8, 100);
// Read data from memory
long longValue = Platform.getLong(null, address);
int intValue = Platform.getInt(null, address + 8);
// Clean up
Platform.freeMemory(address);long[] array = new long[10];
long baseOffset = Platform.LONG_ARRAY_OFFSET;
// Write to array using unsafe operations
for (int i = 0; i < array.length; i++) {
Platform.putLong(array, baseOffset + (i * 8), i * 10);
}
// Read from array using unsafe operations
for (int i = 0; i < array.length; i++) {
long value = Platform.getLong(array, baseOffset + (i * 8));
System.out.println("Index " + i + ": " + value);
}// Create source and destination buffers
long srcAddress = Platform.allocateMemory(1024);
long dstAddress = Platform.allocateMemory(1024);
// Fill source with data
Platform.setMemory(srcAddress, (byte) 0xAB, 1024);
// Copy memory
Platform.copyMemory(null, srcAddress, null, dstAddress, 1024);
// Cleanup
Platform.freeMemory(srcAddress);
Platform.freeMemory(dstAddress);/**
* Returns true when running JVM supports unaligned memory access.
*/
public static boolean unaligned()public static final int BOOLEAN_ARRAY_OFFSET
public static final int BYTE_ARRAY_OFFSET
public static final int SHORT_ARRAY_OFFSET
public static final int INT_ARRAY_OFFSET
public static final int LONG_ARRAY_OFFSET
public static final int FLOAT_ARRAY_OFFSET
public static final int DOUBLE_ARRAY_OFFSET/**
* Allocates off-heap memory and returns the address.
*/
public static long allocateMemory(long size)
/**
* Frees previously allocated off-heap memory.
*/
public static void freeMemory(long address)
/**
* Reallocates memory block to new size, copying existing data.
*/
public static long reallocateMemory(long address, long oldSize, long newSize)
/**
* Allocates DirectByteBuffer bypassing JVM MaxDirectMemorySize limit.
*/
public static java.nio.ByteBuffer allocateDirectBuffer(int size)public static int getInt(Object object, long offset)
public static void putInt(Object object, long offset, int value)public static boolean getBoolean(Object object, long offset)
public static void putBoolean(Object object, long offset, boolean value)public static byte getByte(Object object, long offset)
public static void putByte(Object object, long offset, byte value)public static short getShort(Object object, long offset)
public static void putShort(Object object, long offset, short value)public static long getLong(Object object, long offset)
public static void putLong(Object object, long offset, long value)public static float getFloat(Object object, long offset)
public static void putFloat(Object object, long offset, float value)public static double getDouble(Object object, long offset)
public static void putDouble(Object object, long offset, double value)/**
* Reads object reference with volatile semantics.
*/
public static Object getObjectVolatile(Object object, long offset)
/**
* Writes object reference with volatile semantics.
*/
public static void putObjectVolatile(Object object, long offset, Object value)/**
* Fills memory with specified byte value.
*/
public static void setMemory(Object object, long offset, long size, byte value)
/**
* Fills off-heap memory with specified byte value.
*/
public static void setMemory(long address, byte value, long size)
/**
* Copies memory between locations, handling overlapping regions.
*/
public static void copyMemory(Object src, long srcOffset, Object dst, long dstOffset, long length)/**
* Throws exception bypassing compiler checks for checked exceptions.
*/
public static void throwException(Throwable t)Memory Management: Always pair allocateMemory() calls with freeMemory() to prevent memory leaks.
Object References: When accessing object fields, pass the object as the first parameter and use appropriate array base offsets.
Off-heap Access: When accessing off-heap memory, pass null as the object parameter.
Alignment: Use unaligned() to check if the platform supports unaligned memory access before performing unaligned operations.
Thread Safety: These operations are not inherently thread-safe. Proper synchronization must be implemented at the application level.
Performance: These methods bypass Java's bounds checking and type safety for maximum performance. Use with extreme caution.
The UnsafeAlignedOffset class provides utilities for working with aligned memory offsets, particularly useful for reading and writing size information in memory-efficient data structures.
public class UnsafeAlignedOffset {
/**
* Returns the aligned offset size in bytes (4 or 8 bytes depending on platform).
* This represents the size of the aligned offset data structure.
*/
public static int getUaoSize();
/**
* Reads a size value from an aligned offset location.
*
* @param object Base object containing the aligned offset
* @param offset Offset within the base object
* @return Size value stored at the aligned offset
*/
public static int getSize(Object object, long offset);
/**
* Writes a size value to an aligned offset location.
*
* @param object Base object containing the aligned offset
* @param offset Offset within the base object
* @param value Size value to store
*/
public static void putSize(Object object, long offset, int value);
}// Working with aligned offsets
int uaoSize = UnsafeAlignedOffset.getUaoSize(); // Get platform-specific aligned offset size
// Allocate memory for data structure with aligned offset header
MemoryAllocator allocator = MemoryAllocator.HEAP;
MemoryBlock block = allocator.allocate(uaoSize + 1024); // Header + data
try {
Object baseObject = block.getBaseObject();
long baseOffset = block.getBaseOffset();
// Write size information to aligned offset header
int dataSize = 1024;
UnsafeAlignedOffset.putSize(baseObject, baseOffset, dataSize);
// Read size information back
int storedSize = UnsafeAlignedOffset.getSize(baseObject, baseOffset);
assert storedSize == dataSize;
// Use the remaining space for actual data
long dataOffset = baseOffset + uaoSize;
Platform.setMemory(baseObject, dataOffset, dataSize, (byte) 0xFF);
} finally {
allocator.free(block);
}// Example: Variable-length data structure with size header
public class VariableLengthData {
private final MemoryBlock memory;
private final Object baseObject;
private final long baseOffset;
private final int headerSize;
public VariableLengthData(int dataSize) {
this.headerSize = UnsafeAlignedOffset.getUaoSize();
this.memory = MemoryAllocator.HEAP.allocate(headerSize + dataSize);
this.baseObject = memory.getBaseObject();
this.baseOffset = memory.getBaseOffset();
// Store data size in aligned offset header
UnsafeAlignedOffset.putSize(baseObject, baseOffset, dataSize);
}
public int getDataSize() {
return UnsafeAlignedOffset.getSize(baseObject, baseOffset);
}
public long getDataOffset() {
return baseOffset + headerSize;
}
public void writeData(byte[] data) {
int maxSize = getDataSize();
int writeSize = Math.min(data.length, maxSize);
Platform.copyMemory(
data, Platform.BYTE_ARRAY_OFFSET,
baseObject, getDataOffset(),
writeSize
);
}
public void close() {
MemoryAllocator.HEAP.free(memory);
}
}Platform Dependent: The aligned offset size varies by platform (typically 4 or 8 bytes).
Alignment Requirements: Aligned offsets must be properly aligned in memory for optimal performance.
Size Limitations: Size values are stored as 32-bit integers, limiting the maximum representable size.
Memory Layout: Aligned offsets are typically used as headers in variable-length data structures.
Performance: Direct memory access provides maximum performance for size operations.