or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

array-operations.md hashing-utilities.md index.md memory-management.md platform-operations.md utf8-string-operations.md

tile.json

Array Operations

Efficient array implementations and utility methods for byte arrays and long arrays with support for both heap and off-heap memory storage, providing optimized operations for Spark's internal data processing needs.

Capabilities

LongArray Operations

Memory-efficient array of long values that can be backed by either heap or off-heap memory, designed for high-performance numerical operations.

/**
 * Array of long values supporting on-heap and off-heap memory
 */
final class LongArray {
    /**
     * Create LongArray backed by a memory block
     * @param memory MemoryBlock providing storage
     */
    public LongArray(MemoryBlock memory);
    
    /**
     * Get the underlying memory block
     * @return MemoryBlock backing this array
     */
    public MemoryBlock memoryBlock();
    
    /**
     * Get base object for memory access (null for off-heap)
     * @return Base object or null
     */
    public Object getBaseObject();
    
    /**
     * Get base offset within object or direct address
     * @return Offset or address
     */
    public long getBaseOffset();
    
    /**
     * Get number of long elements in array
     * @return Array size in elements
     */
    public long size();
    
    /**
     * Fill entire array with zeros
     */
    public void zeroOut();
    
    /**
     * Set value at specified index
     * @param index Element index
     * @param value Long value to store
     */
    public void set(int index, long value);
    
    /**
     * Get value at specified index
     * @param index Element index
     * @return Long value at index
     */
    public long get(int index);
}

Usage Examples:

import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.memory.*;

// Create LongArray with heap storage
MemoryAllocator allocator = MemoryAllocator.HEAP;
MemoryBlock block = allocator.allocate(800); // 100 longs * 8 bytes
LongArray array = new LongArray(block);

try {
    // Initialize array
    array.zeroOut();
    
    // Set values
    for (int i = 0; i < 10; i++) {
        array.set(i, i * 10L);
    }
    
    // Read values
    for (int i = 0; i < 10; i++) {
        long value = array.get(i);
        System.out.println("array[" + i + "] = " + value);
    }
    
    // Get array properties
    long size = array.size();
    Object baseObj = array.getBaseObject();
    
} finally {
    allocator.free(block);
}

// Create LongArray with off-heap storage
MemoryAllocator unsafeAllocator = MemoryAllocator.UNSAFE;
MemoryBlock offHeapBlock = unsafeAllocator.allocate(800);
LongArray offHeapArray = new LongArray(offHeapBlock);
// ... use array ...
unsafeAllocator.free(offHeapBlock);

ByteArrayMethods Utilities

Comprehensive utility methods for byte array operations including comparisons, searching, and memory-related calculations.

/**
 * Utility methods for byte array operations with unsafe optimizations
 */
class ByteArrayMethods {
    
    // Constants
    /**
     * Maximum safe array length for rounded arrays
     */
    public static final int MAX_ROUNDED_ARRAY_LENGTH;
    
    // Memory alignment utilities
    /**
     * Get next power of 2 greater than or equal to num
     * @param num Input number
     * @return Next power of 2
     */
    public static long nextPowerOf2(long num);
    
    /**
     * Round number of bytes to nearest word boundary
     * @param numBytes Number of bytes
     * @return Rounded byte count
     */
    public static int roundNumberOfBytesToNearestWord(int numBytes);
    
    /**
     * Round number of bytes to nearest word boundary (long version)
     * @param numBytes Number of bytes
     * @return Rounded byte count
     */
    public static long roundNumberOfBytesToNearestWord(long numBytes);
    
    // Array comparison operations
    /**
     * Compare two byte arrays for equality using unsafe operations
     * @param leftBase Left array base object (null for off-heap)
     * @param leftOffset Left array offset or address
     * @param rightBase Right array base object (null for off-heap)
     * @param rightOffset Right array offset or address
     * @param length Number of bytes to compare
     * @return true if arrays are equal
     */
    public static boolean arrayEquals(Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length);
    
    // Array search operations
    /**
     * Check if byte array contains sub-array
     * @param arr Main byte array
     * @param sub Sub-array to search for
     * @return true if sub-array is found
     */
    public static boolean contains(byte[] arr, byte[] sub);
    
    /**
     * Check if array starts with target sub-array
     * @param array Main byte array
     * @param target Target sub-array
     * @return true if array starts with target
     */
    public static boolean startsWith(byte[] array, byte[] target);
    
    /**
     * Check if array ends with target sub-array
     * @param array Main byte array
     * @param target Target sub-array
     * @return true if array ends with target
     */
    public static boolean endsWith(byte[] array, byte[] target);
    
    /**
     * Check if sub-array matches at specific position
     * @param arr Main byte array
     * @param sub Sub-array to match
     * @param pos Position to check match
     * @return true if sub-array matches at position
     */
    public static boolean matchAt(byte[] arr, byte[] sub, int pos);
}

Usage Examples:

import org.apache.spark.unsafe.array.ByteArrayMethods;

// Memory alignment calculations
long size = 1000;
long powerOf2 = ByteArrayMethods.nextPowerOf2(size); // Next power of 2 >= 1000
int alignedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(37); // Round to word boundary

// Array comparisons using unsafe operations
byte[] array1 = "Hello World".getBytes();
byte[] array2 = "Hello World".getBytes();

boolean areEqual = ByteArrayMethods.arrayEquals(
    array1, Platform.BYTE_ARRAY_OFFSET,
    array2, Platform.BYTE_ARRAY_OFFSET,
    array1.length
);

// Array searching operations
byte[] mainArray = "Hello World Program".getBytes();
byte[] searchFor = "World".getBytes();

boolean contains = ByteArrayMethods.contains(mainArray, searchFor);
boolean starts = ByteArrayMethods.startsWith(mainArray, "Hello".getBytes());
boolean ends = ByteArrayMethods.endsWith(mainArray, "Program".getBytes());
boolean matches = ByteArrayMethods.matchAt(mainArray, "World".getBytes(), 6);

System.out.println("Contains 'World': " + contains);
System.out.println("Starts with 'Hello': " + starts);
System.out.println("Ends with 'Program': " + ends);
System.out.println("'World' at position 6: " + matches);

ByteArray Utilities

Specialized utilities for byte array operations including comparison, manipulation, and SQL-style string operations.

/**
 * Utility methods for byte array operations and comparisons
 */
final class ByteArray {
    
    // Constants
    /**
     * Empty byte array constant
     */
    public static final byte[] EMPTY_BYTE;
    
    // Memory operations
    /**
     * Write byte array to memory location
     * @param src Source byte array
     * @param target Target object (null for off-heap)
     * @param targetOffset Target offset or address
     */
    public static void writeToMemory(byte[] src, Object target, long targetOffset);
    
    /**
     * Get sorting prefix from byte array
     * @param bytes Byte array
     * @return Long prefix for sorting
     */
    public static long getPrefix(byte[] bytes);
    
    // Comparison operations
    /**
     * Compare two byte arrays lexicographically
     * @param leftBase Left byte array
     * @param rightBase Right byte array
     * @return Comparison result (negative, zero, positive)
     */
    public static int compareBinary(byte[] leftBase, byte[] rightBase);
    
    // String-like operations
    /**
     * Extract substring from byte array (SQL-style, 1-based)
     * @param bytes Source byte array
     * @param pos Starting position (1-based)
     * @param len Length of substring
     * @return Substring as byte array
     */
    public static byte[] subStringSQL(byte[] bytes, int pos, int len);
    
    /**
     * Concatenate multiple byte arrays
     * @param inputs Byte arrays to concatenate
     * @return Concatenated byte array
     */
    public static byte[] concat(byte[]... inputs);
    
    /**
     * Left pad byte array to specified length
     * @param bytes Source byte array
     * @param len Target length
     * @param pad Padding bytes
     * @return Left-padded byte array
     */
    public static byte[] lpad(byte[] bytes, int len, byte[] pad);
    
    /**
     * Right pad byte array to specified length
     * @param bytes Source byte array
     * @param len Target length
     * @param pad Padding bytes
     * @return Right-padded byte array
     */
    public static byte[] rpad(byte[] bytes, int len, byte[] pad);
}

Usage Examples:

import org.apache.spark.unsafe.types.ByteArray;
import org.apache.spark.unsafe.Platform;

// Memory operations
byte[] data = "Hello".getBytes();
byte[] buffer = new byte[100];

ByteArray.writeToMemory(data, buffer, Platform.BYTE_ARRAY_OFFSET);
long prefix = ByteArray.getPrefix(data);

// Array comparison
byte[] array1 = "apple".getBytes();
byte[] array2 = "banana".getBytes();
int comparison = ByteArray.compareBinary(array1, array2); // negative

// String-like operations on byte arrays
byte[] source = "Hello World".getBytes();

// Extract substring (SQL-style, 1-based indexing)
byte[] substring = ByteArray.subStringSQL(source, 7, 5); // "World"

// Concatenation
byte[] part1 = "Hello".getBytes();
byte[] part2 = " ".getBytes();
byte[] part3 = "World".getBytes();
byte[] concatenated = ByteArray.concat(part1, part2, part3);

// Padding operations
byte[] text = "Hi".getBytes();
byte[] spaces = " ".getBytes();
byte[] leftPadded = ByteArray.lpad(text, 10, spaces);   // "        Hi"
byte[] rightPadded = ByteArray.rpad(text, 10, spaces);  // "Hi        "

// Working with empty arrays
byte[] empty = ByteArray.EMPTY_BYTE;
System.out.println("Empty array length: " + empty.length);

Performance Optimizations

Memory Alignment

ByteArrayMethods provides utilities for optimizing memory access through proper alignment:

// Calculate optimal buffer sizes
int originalSize = 1000;
int wordAligned = ByteArrayMethods.roundNumberOfBytesToNearestWord(originalSize);

// Use power-of-2 sizes for better memory allocation
long optimalSize = ByteArrayMethods.nextPowerOf2(originalSize);

Unsafe Array Comparisons

For maximum performance, use unsafe array comparison methods:

import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;

// Fast array comparison using unsafe operations
byte[] array1 = getData1();
byte[] array2 = getData2();

boolean equal = ByteArrayMethods.arrayEquals(
    array1, Platform.BYTE_ARRAY_OFFSET,
    array2, Platform.BYTE_ARRAY_OFFSET,
    Math.min(array1.length, array2.length)
);

Memory-Efficient Array Creation

When working with LongArray, consider memory allocation strategy:

// For temporary arrays, use heap allocation
MemoryAllocator heapAllocator = MemoryAllocator.HEAP;
MemoryBlock heapBlock = heapAllocator.allocate(elementCount * 8);
LongArray heapArray = new LongArray(heapBlock);

// For long-lived arrays, consider off-heap allocation
MemoryAllocator offHeapAllocator = MemoryAllocator.UNSAFE;
MemoryBlock offHeapBlock = offHeapAllocator.allocate(elementCount * 8);
LongArray offHeapArray = new LongArray(offHeapBlock);

Memory Management Best Practices

Resource Cleanup

Always ensure proper cleanup of allocated memory:

import org.apache.spark.unsafe.array.LongArray;
import org.apache.spark.unsafe.memory.*;

public void processLongArray(int size) {
    MemoryAllocator allocator = MemoryAllocator.HEAP;
    MemoryBlock block = null;
    
    try {
        block = allocator.allocate(size * 8);
        LongArray array = new LongArray(block);
        
        // Use the array
        array.zeroOut();
        for (int i = 0; i < size; i++) {
            array.set(i, i * 2L);
        }
        
        // Process data
        processData(array);
        
    } catch (OutOfMemoryError e) {
        System.err.println("Failed to allocate array: " + e.getMessage());
    } finally {
        if (block != null) {
            allocator.free(block);
        }
    }
}

Array Size Considerations

Consider maximum array size limits:

// Check against maximum safe array length
int requestedSize = calculateRequiredSize();
if (requestedSize > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
    throw new IllegalArgumentException("Array size too large: " + requestedSize);
}

int safeSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(requestedSize);

Integration with Other Components

Working with Platform Operations

Array operations integrate seamlessly with Platform methods:

import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.LongArray;

LongArray array = createLongArray(100);

// Direct memory access using Platform
Object baseObj = array.getBaseObject();
long baseOffset = array.getBaseOffset();

// Set value using Platform (equivalent to array.set(0, 42L))
Platform.putLong(baseObj, baseOffset, 42L);

// Get value using Platform (equivalent to array.get(0))
long value = Platform.getLong(baseObj, baseOffset);

Memory Block Integration

LongArray works directly with MemoryBlock instances:

// Create from existing MemoryBlock
MemoryBlock existingBlock = allocateMemoryBlock(800);
LongArray array = new LongArray(existingBlock);

// Access underlying MemoryBlock
MemoryBlock block = array.memoryBlock();
long blockSize = block.size();
int pageNumber = block.pageNumber;

Version

Tile

Files