0
# Array Operations
1
2
Spark Unsafe provides high-performance utilities for array operations, including optimized byte array methods, memory-backed long arrays, and key-value iterators. These utilities are designed for maximum performance in data processing workloads by leveraging unsafe memory operations and word-aligned access patterns.
3
4
## Core Imports
5
6
```java
7
import java.io.IOException;
8
import org.apache.spark.unsafe.array.ByteArrayMethods;
9
import org.apache.spark.unsafe.array.LongArray;
10
import org.apache.spark.unsafe.memory.MemoryBlock;
11
import org.apache.spark.unsafe.KVIterator;
12
import org.apache.spark.unsafe.types.ByteArray;
13
```
14
15
## Usage Examples
16
17
### Byte Array Utilities
18
19
```java
20
// Power of 2 calculations
21
long nextPower = ByteArrayMethods.nextPowerOf2(100); // Returns 128
22
long powerOf16 = ByteArrayMethods.nextPowerOf2(16); // Returns 16
23
24
// Word alignment calculations
25
int aligned1 = ByteArrayMethods.roundNumberOfBytesToNearestWord(15); // Returns 16
26
int aligned2 = ByteArrayMethods.roundNumberOfBytesToNearestWord(24); // Returns 24
27
28
// High-performance array comparison
29
byte[] array1 = "Hello, World!".getBytes(StandardCharsets.UTF_8);
30
byte[] array2 = "Hello, World!".getBytes(StandardCharsets.UTF_8);
31
byte[] array3 = "Different".getBytes(StandardCharsets.UTF_8);
32
33
boolean equal1 = ByteArrayMethods.arrayEquals(
34
array1, Platform.BYTE_ARRAY_OFFSET,
35
array2, Platform.BYTE_ARRAY_OFFSET,
36
array1.length
37
); // true
38
39
boolean equal2 = ByteArrayMethods.arrayEquals(
40
array1, Platform.BYTE_ARRAY_OFFSET,
41
array3, Platform.BYTE_ARRAY_OFFSET,
42
array1.length
43
); // false
44
```
45
46
### Memory-Backed Long Arrays
47
48
```java
49
// Create memory block for long array
50
HeapMemoryAllocator allocator = new HeapMemoryAllocator();
51
MemoryBlock memory = allocator.allocate(80); // 10 longs * 8 bytes each
52
53
// Create long array backed by memory block
54
LongArray longArray = new LongArray(memory);
55
56
// Basic operations
57
long capacity = longArray.size(); // Number of longs this array can hold
58
System.out.println("Array capacity: " + capacity);
59
60
// Fill array with data
61
for (int i = 0; i < capacity; i++) {
62
longArray.set(i, i * 10L);
63
}
64
65
// Read data from array
66
for (int i = 0; i < capacity; i++) {
67
long value = longArray.get(i);
68
System.out.println("Index " + i + ": " + value);
69
}
70
71
// Zero out the entire array
72
longArray.zeroOut();
73
74
// Verify array is zeroed
75
for (int i = 0; i < capacity; i++) {
76
long value = longArray.get(i);
77
assert value == 0L;
78
}
79
80
// Clean up
81
allocator.free(memory);
82
```
83
84
### Advanced Array Operations
85
86
```java
87
// Working with memory-backed arrays and direct access
88
MemoryBlock block = allocator.allocate(1024);
89
LongArray array = new LongArray(block);
90
91
// Get direct memory access information
92
Object baseObject = array.getBaseObject();
93
long baseOffset = array.getBaseOffset();
94
MemoryBlock underlyingBlock = array.memoryBlock();
95
96
// Use Platform class for direct memory access
97
Platform.putLong(baseObject, baseOffset, 12345L);
98
long directValue = Platform.getLong(baseObject, baseOffset);
99
100
// Compare with array methods
101
array.set(0, 12345L);
102
long arrayValue = array.get(0);
103
104
assert directValue == arrayValue; // Both approaches yield same result
105
```
106
107
## API Reference
108
109
### ByteArrayMethods Class
110
111
```java { .api }
112
public class ByteArrayMethods {
113
/**
114
* Maximum safe array length for word-aligned arrays.
115
*/
116
public static final int MAX_ROUNDED_ARRAY_LENGTH;
117
118
/**
119
* Returns the next power of 2 greater than or equal to the input.
120
* For inputs already a power of 2, returns the input unchanged.
121
*/
122
public static long nextPowerOf2(long num);
123
124
/**
125
* Rounds byte count up to the nearest 8-byte (word) boundary.
126
*/
127
public static int roundNumberOfBytesToNearestWord(int numBytes);
128
129
/**
130
* Rounds byte count up to the nearest 8-byte (word) boundary.
131
*/
132
public static long roundNumberOfBytesToNearestWord(long numBytes);
133
134
/**
135
* High-performance byte array equality comparison using unsafe operations.
136
* Compares arrays in word-sized chunks for maximum performance.
137
*
138
* @param leftBase Base object for left array (array itself for heap arrays)
139
* @param leftOffset Offset within left base object
140
* @param rightBase Base object for right array
141
* @param rightOffset Offset within right base object
142
* @param length Number of bytes to compare
143
* @return true if arrays are equal, false otherwise
144
*/
145
public static boolean arrayEquals(Object leftBase, long leftOffset,
146
Object rightBase, long rightOffset, long length);
147
}
148
```
149
150
### LongArray Class
151
152
```java { .api }
153
public final class LongArray {
154
/**
155
* Creates a long array backed by the specified memory block.
156
* The memory block must be at least 8-byte aligned and have sufficient space.
157
*/
158
public LongArray(MemoryBlock memory);
159
160
/**
161
* Returns the underlying memory block backing this array.
162
*/
163
public MemoryBlock memoryBlock();
164
165
/**
166
* Returns the base object for direct memory access.
167
* For heap-allocated arrays, this is the underlying byte array.
168
* For off-heap arrays, this is null.
169
*/
170
public Object getBaseObject();
171
172
/**
173
* Returns the base offset for direct memory access.
174
*/
175
public long getBaseOffset();
176
177
/**
178
* Returns the number of long elements this array can hold.
179
* This is the memory block size divided by 8.
180
*/
181
public long size();
182
183
/**
184
* Fills the entire array with zeros using optimized memory operations.
185
*/
186
public void zeroOut();
187
188
/**
189
* Sets the value at the specified index.
190
*
191
* @param index Array index (0-based)
192
* @param value Long value to store
193
*/
194
public void set(int index, long value);
195
196
/**
197
* Gets the value at the specified index.
198
*
199
* @param index Array index (0-based)
200
* @return Long value at the specified index
201
*/
202
public long get(int index);
203
}
204
```
205
206
## Performance Characteristics
207
208
### ByteArrayMethods Performance
209
210
1. **Word-Aligned Comparison**: The `arrayEquals` method compares arrays in 8-byte chunks when possible, significantly faster than byte-by-byte comparison.
211
212
2. **SIMD Optimization**: On supported platforms, the JVM may use SIMD instructions for bulk operations.
213
214
3. **Cache Efficiency**: Word-aligned access patterns improve CPU cache utilization.
215
216
### LongArray Performance
217
218
1. **Direct Memory Access**: Bypasses array bounds checking for maximum performance.
219
220
2. **Memory Layout**: Uses contiguous memory layout for optimal cache performance.
221
222
3. **Bulk Operations**: The `zeroOut()` method uses optimized memory filling operations.
223
224
## Memory Management
225
226
### ByteArrayMethods
227
228
- No direct memory management required
229
- Works with existing arrays and memory regions
230
- Comparison operations don't allocate additional memory
231
232
### LongArray
233
234
- Backed by `MemoryBlock` which must be explicitly managed
235
- Does not own the underlying memory block
236
- Memory block must remain valid for the lifetime of the LongArray
237
- Caller responsible for freeing the underlying memory block
238
239
## Usage Notes
240
241
1. **Bounds Checking**: LongArray does not perform bounds checking for performance reasons. Ensure indices are within valid range.
242
243
2. **Memory Alignment**: LongArray requires 8-byte aligned memory blocks for correct operation.
244
245
3. **Thread Safety**: Neither ByteArrayMethods nor LongArray provide thread safety guarantees.
246
247
4. **Memory Block Lifetime**: Ensure the MemoryBlock backing a LongArray remains valid during array usage.
248
249
5. **Platform Dependencies**: Performance characteristics may vary across different JVM implementations and platforms.
250
251
## Common Patterns
252
253
### Safe Array Creation and Usage
254
255
```java
256
// Calculate required size with proper alignment
257
int numElements = 1000;
258
long requiredBytes = numElements * 8L; // 8 bytes per long
259
long alignedBytes = ByteArrayMethods.roundNumberOfBytesToNearestWord(requiredBytes);
260
261
// Allocate aligned memory
262
MemoryAllocator allocator = MemoryAllocator.HEAP;
263
MemoryBlock block = allocator.allocate(alignedBytes);
264
265
try {
266
LongArray array = new LongArray(block);
267
268
// Use array safely within calculated bounds
269
long actualCapacity = array.size();
270
for (int i = 0; i < Math.min(numElements, actualCapacity); i++) {
271
array.set(i, i);
272
}
273
274
// Process data...
275
} finally {
276
// Always clean up
277
allocator.free(block);
278
}
279
```
280
281
### Efficient Array Comparison
282
283
```java
284
// Compare arrays efficiently using word-aligned operations
285
public static boolean fastArrayEquals(byte[] a, byte[] b) {
286
if (a.length != b.length) {
287
return false;
288
}
289
290
return ByteArrayMethods.arrayEquals(
291
a, Platform.BYTE_ARRAY_OFFSET,
292
b, Platform.BYTE_ARRAY_OFFSET,
293
a.length
294
);
295
}
296
```
297
298
### Power-of-2 Buffer Sizing
299
300
```java
301
// Calculate optimal buffer size
302
int desiredSize = 1000;
303
long optimalSize = ByteArrayMethods.nextPowerOf2(desiredSize);
304
MemoryBlock buffer = allocator.allocate(optimalSize);
305
```
306
307
## Additional Array Utilities
308
309
### KVIterator Interface
310
311
```java { .api }
312
/**
313
* Abstract base class for key-value iterators.
314
* Provides a common interface for iterating over key-value pairs.
315
*/
316
public abstract class KVIterator<K, V> {
317
/**
318
* Advances to the next key-value pair.
319
* @return true if there is a next pair, false if iteration is complete
320
* @throws IOException if an I/O error occurs during iteration
321
*/
322
public abstract boolean next() throws IOException;
323
324
/**
325
* Returns the current key.
326
* Must be called after a successful next() call.
327
* @return the current key
328
*/
329
public abstract K getKey();
330
331
/**
332
* Returns the current value.
333
* Must be called after a successful next() call.
334
* @return the current value
335
*/
336
public abstract V getValue();
337
338
/**
339
* Closes the iterator and releases any associated resources.
340
*/
341
public abstract void close();
342
}
343
```
344
345
### ByteArray Utilities
346
347
```java { .api }
348
public final class ByteArray {
349
/**
350
* Empty byte array constant.
351
*/
352
public static final byte[] EMPTY_BYTE;
353
354
/**
355
* Writes byte array content to specified memory location.
356
*
357
* @param src Source byte array
358
* @param target Target base object
359
* @param targetOffset Offset within target object
360
*/
361
public static void writeToMemory(byte[] src, Object target, long targetOffset);
362
363
/**
364
* Returns 64-bit prefix of byte array for sorting operations.
365
*
366
* @param bytes Input byte array
367
* @return 64-bit prefix value
368
*/
369
public static long getPrefix(byte[] bytes);
370
371
/**
372
* Extracts substring from byte array using SQL semantics.
373
*
374
* @param bytes Source byte array
375
* @param pos Starting position (1-based, SQL-style)
376
* @param len Length of substring
377
* @return Extracted byte array substring
378
*/
379
public static byte[] subStringSQL(byte[] bytes, int pos, int len);
380
381
/**
382
* Concatenates multiple byte arrays into a single array.
383
*
384
* @param inputs Variable number of byte arrays to concatenate
385
* @return Concatenated byte array
386
*/
387
public static byte[] concat(byte[]... inputs);
388
}
389
```
390
391
### Usage Examples for Additional Utilities
392
393
#### KVIterator Usage Pattern
394
395
```java
396
// Example implementation of KVIterator
397
public class SimpleKVIterator extends KVIterator<String, Integer> {
398
private final Map<String, Integer> data;
399
private final Iterator<Map.Entry<String, Integer>> iterator;
400
private Map.Entry<String, Integer> current;
401
402
public SimpleKVIterator(Map<String, Integer> data) {
403
this.data = data;
404
this.iterator = data.entrySet().iterator();
405
this.current = null;
406
}
407
408
@Override
409
public boolean next() {
410
if (iterator.hasNext()) {
411
current = iterator.next();
412
return true;
413
}
414
return false;
415
}
416
417
@Override
418
public String getKey() {
419
return current != null ? current.getKey() : null;
420
}
421
422
@Override
423
public Integer getValue() {
424
return current != null ? current.getValue() : null;
425
}
426
427
@Override
428
public void close() {
429
// Clean up resources if needed
430
current = null;
431
}
432
}
433
434
// Usage
435
Map<String, Integer> data = Map.of("a", 1, "b", 2, "c", 3);
436
KVIterator<String, Integer> iterator = new SimpleKVIterator(data);
437
438
while (iterator.next()) {
439
String key = iterator.getKey();
440
Integer value = iterator.getValue();
441
System.out.println(key + " -> " + value);
442
}
443
iterator.close();
444
```
445
446
#### ByteArray Operations
447
448
```java
449
// Working with ByteArray utilities
450
byte[] data1 = "Hello".getBytes(StandardCharsets.UTF_8);
451
byte[] data2 = "World".getBytes(StandardCharsets.UTF_8);
452
453
// Concatenate arrays
454
byte[] concatenated = ByteArray.concat(data1, " ".getBytes(), data2);
455
String result = new String(concatenated, StandardCharsets.UTF_8); // "Hello World"
456
457
// Get prefix for sorting
458
long prefix1 = ByteArray.getPrefix(data1);
459
long prefix2 = ByteArray.getPrefix(data2);
460
int comparison = Long.compare(prefix1, prefix2);
461
462
// SQL-style substring
463
byte[] fullText = "Hello, World!".getBytes(StandardCharsets.UTF_8);
464
byte[] substring = ByteArray.subStringSQL(fullText, 8, 5); // "World" (1-based, length 5)
465
466
// Write to memory
467
MemoryAllocator allocator = MemoryAllocator.HEAP;
468
MemoryBlock block = allocator.allocate(concatenated.length);
469
try {
470
ByteArray.writeToMemory(concatenated, block.getBaseObject(), block.getBaseOffset());
471
// Data is now written to memory block
472
} finally {
473
allocator.free(block);
474
}
475
```