Optimized byte and long array operations supporting both on-heap and off-heap memory without bounds checking for maximum performance. These operations are designed for high-throughput data processing in distributed computing environments.
Utility methods for optimized byte array operations, comparisons, and memory-efficient manipulations.
public class ByteArrayMethods {
public static long nextPowerOf2(long num);
public static int roundNumberOfBytesToNearestWord(int numBytes);
public static long roundNumberOfBytesToNearestWord(long numBytes);
public static boolean arrayEquals(Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length);
public static boolean contains(byte[] arr, byte[] sub);
public static boolean startsWith(byte[] array, byte[] target);
public static boolean endsWith(byte[] array, byte[] target);
public static boolean matchAt(byte[] arr, byte[] sub, int pos);
public static final int MAX_ROUNDED_ARRAY_LENGTH;
}High-performance array of long values supporting both on-heap and off-heap memory without bounds checking.
public final class LongArray {
public LongArray(MemoryBlock memory);
public MemoryBlock memoryBlock();
public Object getBaseObject();
public long getBaseOffset();
public long size();
public void zeroOut();
public void set(int index, long value);
public long get(int index);
}Abstract iterator interface for key-value pairs with I/O exception handling capabilities.
public abstract class KVIterator<K, V> {
public abstract boolean next() throws IOException;
public abstract K getKey();
public abstract V getValue();
public abstract void close();
}import org.apache.spark.unsafe.array.ByteArrayMethods;
// Power of 2 calculations for memory alignment
long size = ByteArrayMethods.nextPowerOf2(1000); // Returns 1024
int rounded = ByteArrayMethods.roundNumberOfBytesToNearestWord(15); // Aligns to word boundary
// Array comparison and search operations
byte[] data = "Hello World".getBytes();
byte[] pattern = "World".getBytes();
boolean hasPattern = ByteArrayMethods.contains(data, pattern);
boolean startsWithHello = ByteArrayMethods.startsWith(data, "Hello".getBytes());
boolean endsWithWorld = ByteArrayMethods.endsWith(data, "World".getBytes());
boolean matchesAtPosition = ByteArrayMethods.matchAt(data, pattern, 6);
// High-performance array equality check
byte[] array1 = "test data".getBytes();
byte[] array2 = "test data".getBytes();
boolean equal = ByteArrayMethods.arrayEquals(
array1, 0, // Left base and offset
array2, 0, // Right base and offset
array1.length // Length to compare
);import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.memory.MemoryAllocator;
import org.apache.spark.unsafe.memory.MemoryBlock;
// Create long array from allocated memory
MemoryAllocator allocator = MemoryAllocator.UNSAFE;
MemoryBlock memory = allocator.allocate(8 * 1000); // 1000 longs
LongArray array = new LongArray(memory);
// Array operations
long arraySize = array.size(); // Number of elements
array.zeroOut(); // Fill with zeros
// Set and get values (no bounds checking for performance)
for (int i = 0; i < arraySize; i++) {
array.set(i, i * 2L);
}
long value = array.get(500); // Get value at index 500
// Access underlying memory
Object baseObject = array.getBaseObject();
long baseOffset = array.getBaseOffset();
MemoryBlock underlyingMemory = array.memoryBlock();
// Clean up
allocator.free(memory);import org.apache.spark.unsafe.array.ByteArrayMethods;
import org.apache.spark.unsafe.Platform;
// Create two memory regions
long addr1 = Platform.allocateMemory(1000);
long addr2 = Platform.allocateMemory(1000);
// Fill with test data
Platform.setMemory(addr1, (byte) 0x42, 1000);
Platform.setMemory(addr2, (byte) 0x42, 1000);
// High-performance comparison
boolean equal = ByteArrayMethods.arrayEquals(
null, addr1, // Left base (null for off-heap) and offset
null, addr2, // Right base and offset
1000 // Length
);
// Clean up
Platform.freeMemory(addr1);
Platform.freeMemory(addr2);import org.apache.spark.unsafe.KVIterator;
import java.io.IOException;
import java.util.List;
// Example implementation for iterating over key-value pairs
public class ListKVIterator extends KVIterator<String, Integer> {
private final List<Map.Entry<String, Integer>> entries;
private int position = -1;
public ListKVIterator(List<Map.Entry<String, Integer>> entries) {
this.entries = entries;
}
@Override
public boolean next() throws IOException {
position++;
return position < entries.size();
}
@Override
public String getKey() {
return entries.get(position).getKey();
}
@Override
public Integer getValue() {
return entries.get(position).getValue();
}
@Override
public void close() {
// Cleanup resources if needed
}
}
// Usage
KVIterator<String, Integer> iterator = new ListKVIterator(keyValuePairs);
try {
while (iterator.next()) {
String key = iterator.getKey();
Integer value = iterator.getValue();
// Process key-value pair
}
} finally {
iterator.close();
}import org.apache.spark.unsafe.array.ByteArrayMethods;
// Ensure memory allocations are word-aligned for optimal performance
int requestedSize = 123;
int alignedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(requestedSize);
// Use aligned size for memory allocation
long address = Platform.allocateMemory(alignedSize);
// Check maximum safe array length
int maxLength = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH;
if (requestedSize <= maxLength) {
// Safe to allocate
}