or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

arrays.mdhashing-bitsets.mdindex.mdintervals.mdmemory.mdplatform.mdutf8-strings.md

arrays.mddocs/

0

# Array Operations

1

2

Spark Unsafe provides high-performance utilities for array operations, including optimized byte array methods, memory-backed long arrays, and key-value iterators. These utilities are designed for maximum performance in data processing workloads by leveraging unsafe memory operations and word-aligned access patterns.

3

4

## Core Imports

5

6

```java

7

import java.io.IOException;

8

import org.apache.spark.unsafe.array.ByteArrayMethods;

9

import org.apache.spark.unsafe.array.LongArray;

10

import org.apache.spark.unsafe.memory.MemoryBlock;

11

import org.apache.spark.unsafe.KVIterator;

12

import org.apache.spark.unsafe.types.ByteArray;

13

```

14

15

## Usage Examples

16

17

### Byte Array Utilities

18

19

```java

20

// Power of 2 calculations

21

long nextPower = ByteArrayMethods.nextPowerOf2(100); // Returns 128

22

long powerOf16 = ByteArrayMethods.nextPowerOf2(16); // Returns 16

23

24

// Word alignment calculations

25

int aligned1 = ByteArrayMethods.roundNumberOfBytesToNearestWord(15); // Returns 16

26

int aligned2 = ByteArrayMethods.roundNumberOfBytesToNearestWord(24); // Returns 24

27

28

// High-performance array comparison

29

byte[] array1 = "Hello, World!".getBytes(StandardCharsets.UTF_8);

30

byte[] array2 = "Hello, World!".getBytes(StandardCharsets.UTF_8);

31

byte[] array3 = "Different".getBytes(StandardCharsets.UTF_8);

32

33

boolean equal1 = ByteArrayMethods.arrayEquals(

34

array1, Platform.BYTE_ARRAY_OFFSET,

35

array2, Platform.BYTE_ARRAY_OFFSET,

36

array1.length

37

); // true

38

39

boolean equal2 = ByteArrayMethods.arrayEquals(

40

array1, Platform.BYTE_ARRAY_OFFSET,

41

array3, Platform.BYTE_ARRAY_OFFSET,

42

array1.length

43

); // false

44

```

45

46

### Memory-Backed Long Arrays

47

48

```java

49

// Create memory block for long array

50

HeapMemoryAllocator allocator = new HeapMemoryAllocator();

51

MemoryBlock memory = allocator.allocate(80); // 10 longs * 8 bytes each

52

53

// Create long array backed by memory block

54

LongArray longArray = new LongArray(memory);

55

56

// Basic operations

57

long capacity = longArray.size(); // Number of longs this array can hold

58

System.out.println("Array capacity: " + capacity);

59

60

// Fill array with data

61

for (int i = 0; i < capacity; i++) {

62

longArray.set(i, i * 10L);

63

}

64

65

// Read data from array

66

for (int i = 0; i < capacity; i++) {

67

long value = longArray.get(i);

68

System.out.println("Index " + i + ": " + value);

69

}

70

71

// Zero out the entire array

72

longArray.zeroOut();

73

74

// Verify array is zeroed

75

for (int i = 0; i < capacity; i++) {

76

long value = longArray.get(i);

77

assert value == 0L;

78

}

79

80

// Clean up

81

allocator.free(memory);

82

```

83

84

### Advanced Array Operations

85

86

```java

87

// Working with memory-backed arrays and direct access

88

MemoryBlock block = allocator.allocate(1024);

89

LongArray array = new LongArray(block);

90

91

// Get direct memory access information

92

Object baseObject = array.getBaseObject();

93

long baseOffset = array.getBaseOffset();

94

MemoryBlock underlyingBlock = array.memoryBlock();

95

96

// Use Platform class for direct memory access

97

Platform.putLong(baseObject, baseOffset, 12345L);

98

long directValue = Platform.getLong(baseObject, baseOffset);

99

100

// Compare with array methods

101

array.set(0, 12345L);

102

long arrayValue = array.get(0);

103

104

assert directValue == arrayValue; // Both approaches yield same result

105

```

106

107

## API Reference

108

109

### ByteArrayMethods Class

110

111

```java { .api }

112

public class ByteArrayMethods {

113

/**

114

* Maximum safe array length for word-aligned arrays.

115

*/

116

public static final int MAX_ROUNDED_ARRAY_LENGTH;

117

118

/**

119

* Returns the next power of 2 greater than or equal to the input.

120

* For inputs already a power of 2, returns the input unchanged.

121

*/

122

public static long nextPowerOf2(long num);

123

124

/**

125

* Rounds byte count up to the nearest 8-byte (word) boundary.

126

*/

127

public static int roundNumberOfBytesToNearestWord(int numBytes);

128

129

/**

130

* Rounds byte count up to the nearest 8-byte (word) boundary.

131

*/

132

public static long roundNumberOfBytesToNearestWord(long numBytes);

133

134

/**

135

* High-performance byte array equality comparison using unsafe operations.

136

* Compares arrays in word-sized chunks for maximum performance.

137

*

138

* @param leftBase Base object for left array (array itself for heap arrays)

139

* @param leftOffset Offset within left base object

140

* @param rightBase Base object for right array

141

* @param rightOffset Offset within right base object

142

* @param length Number of bytes to compare

143

* @return true if arrays are equal, false otherwise

144

*/

145

public static boolean arrayEquals(Object leftBase, long leftOffset,

146

Object rightBase, long rightOffset, long length);

147

}

148

```

149

150

### LongArray Class

151

152

```java { .api }

153

public final class LongArray {

154

/**

155

* Creates a long array backed by the specified memory block.

156

* The memory block must be at least 8-byte aligned and have sufficient space.

157

*/

158

public LongArray(MemoryBlock memory);

159

160

/**

161

* Returns the underlying memory block backing this array.

162

*/

163

public MemoryBlock memoryBlock();

164

165

/**

166

* Returns the base object for direct memory access.

167

* For heap-allocated arrays, this is the underlying byte array.

168

* For off-heap arrays, this is null.

169

*/

170

public Object getBaseObject();

171

172

/**

173

* Returns the base offset for direct memory access.

174

*/

175

public long getBaseOffset();

176

177

/**

178

* Returns the number of long elements this array can hold.

179

* This is the memory block size divided by 8.

180

*/

181

public long size();

182

183

/**

184

* Fills the entire array with zeros using optimized memory operations.

185

*/

186

public void zeroOut();

187

188

/**

189

* Sets the value at the specified index.

190

*

191

* @param index Array index (0-based)

192

* @param value Long value to store

193

*/

194

public void set(int index, long value);

195

196

/**

197

* Gets the value at the specified index.

198

*

199

* @param index Array index (0-based)

200

* @return Long value at the specified index

201

*/

202

public long get(int index);

203

}

204

```

205

206

## Performance Characteristics

207

208

### ByteArrayMethods Performance

209

210

1. **Word-Aligned Comparison**: The `arrayEquals` method compares arrays in 8-byte chunks when possible, significantly faster than byte-by-byte comparison.

211

212

2. **SIMD Optimization**: On supported platforms, the JVM may use SIMD instructions for bulk operations.

213

214

3. **Cache Efficiency**: Word-aligned access patterns improve CPU cache utilization.

215

216

### LongArray Performance

217

218

1. **Direct Memory Access**: Bypasses array bounds checking for maximum performance.

219

220

2. **Memory Layout**: Uses contiguous memory layout for optimal cache performance.

221

222

3. **Bulk Operations**: The `zeroOut()` method uses optimized memory filling operations.

223

224

## Memory Management

225

226

### ByteArrayMethods

227

228

- No direct memory management required

229

- Works with existing arrays and memory regions

230

- Comparison operations don't allocate additional memory

231

232

### LongArray

233

234

- Backed by `MemoryBlock` which must be explicitly managed

235

- Does not own the underlying memory block

236

- Memory block must remain valid for the lifetime of the LongArray

237

- Caller responsible for freeing the underlying memory block

238

239

## Usage Notes

240

241

1. **Bounds Checking**: LongArray does not perform bounds checking for performance reasons. Ensure indices are within valid range.

242

243

2. **Memory Alignment**: LongArray requires 8-byte aligned memory blocks for correct operation.

244

245

3. **Thread Safety**: Neither ByteArrayMethods nor LongArray provide thread safety guarantees.

246

247

4. **Memory Block Lifetime**: Ensure the MemoryBlock backing a LongArray remains valid during array usage.

248

249

5. **Platform Dependencies**: Performance characteristics may vary across different JVM implementations and platforms.

250

251

## Common Patterns

252

253

### Safe Array Creation and Usage

254

255

```java

256

// Calculate required size with proper alignment

257

int numElements = 1000;

258

long requiredBytes = numElements * 8L; // 8 bytes per long

259

long alignedBytes = ByteArrayMethods.roundNumberOfBytesToNearestWord(requiredBytes);

260

261

// Allocate aligned memory

262

MemoryAllocator allocator = MemoryAllocator.HEAP;

263

MemoryBlock block = allocator.allocate(alignedBytes);

264

265

try {

266

LongArray array = new LongArray(block);

267

268

// Use array safely within calculated bounds

269

long actualCapacity = array.size();

270

for (int i = 0; i < Math.min(numElements, actualCapacity); i++) {

271

array.set(i, i);

272

}

273

274

// Process data...

275

} finally {

276

// Always clean up

277

allocator.free(block);

278

}

279

```

280

281

### Efficient Array Comparison

282

283

```java

284

// Compare arrays efficiently using word-aligned operations

285

public static boolean fastArrayEquals(byte[] a, byte[] b) {

286

if (a.length != b.length) {

287

return false;

288

}

289

290

return ByteArrayMethods.arrayEquals(

291

a, Platform.BYTE_ARRAY_OFFSET,

292

b, Platform.BYTE_ARRAY_OFFSET,

293

a.length

294

);

295

}

296

```

297

298

### Power-of-2 Buffer Sizing

299

300

```java

301

// Calculate optimal buffer size

302

int desiredSize = 1000;

303

long optimalSize = ByteArrayMethods.nextPowerOf2(desiredSize);

304

MemoryBlock buffer = allocator.allocate(optimalSize);

305

```

306

307

## Additional Array Utilities

308

309

### KVIterator Interface

310

311

```java { .api }

312

/**

313

* Abstract base class for key-value iterators.

314

* Provides a common interface for iterating over key-value pairs.

315

*/

316

public abstract class KVIterator<K, V> {

317

/**

318

* Advances to the next key-value pair.

319

* @return true if there is a next pair, false if iteration is complete

320

* @throws IOException if an I/O error occurs during iteration

321

*/

322

public abstract boolean next() throws IOException;

323

324

/**

325

* Returns the current key.

326

* Must be called after a successful next() call.

327

* @return the current key

328

*/

329

public abstract K getKey();

330

331

/**

332

* Returns the current value.

333

* Must be called after a successful next() call.

334

* @return the current value

335

*/

336

public abstract V getValue();

337

338

/**

339

* Closes the iterator and releases any associated resources.

340

*/

341

public abstract void close();

342

}

343

```

344

345

### ByteArray Utilities

346

347

```java { .api }

348

public final class ByteArray {

349

/**

350

* Empty byte array constant.

351

*/

352

public static final byte[] EMPTY_BYTE;

353

354

/**

355

* Writes byte array content to specified memory location.

356

*

357

* @param src Source byte array

358

* @param target Target base object

359

* @param targetOffset Offset within target object

360

*/

361

public static void writeToMemory(byte[] src, Object target, long targetOffset);

362

363

/**

364

* Returns 64-bit prefix of byte array for sorting operations.

365

*

366

* @param bytes Input byte array

367

* @return 64-bit prefix value

368

*/

369

public static long getPrefix(byte[] bytes);

370

371

/**

372

* Extracts substring from byte array using SQL semantics.

373

*

374

* @param bytes Source byte array

375

* @param pos Starting position (1-based, SQL-style)

376

* @param len Length of substring

377

* @return Extracted byte array substring

378

*/

379

public static byte[] subStringSQL(byte[] bytes, int pos, int len);

380

381

/**

382

* Concatenates multiple byte arrays into a single array.

383

*

384

* @param inputs Variable number of byte arrays to concatenate

385

* @return Concatenated byte array

386

*/

387

public static byte[] concat(byte[]... inputs);

388

}

389

```

390

391

### Usage Examples for Additional Utilities

392

393

#### KVIterator Usage Pattern

394

395

```java

396

// Example implementation of KVIterator

397

public class SimpleKVIterator extends KVIterator<String, Integer> {

398

private final Map<String, Integer> data;

399

private final Iterator<Map.Entry<String, Integer>> iterator;

400

private Map.Entry<String, Integer> current;

401

402

public SimpleKVIterator(Map<String, Integer> data) {

403

this.data = data;

404

this.iterator = data.entrySet().iterator();

405

this.current = null;

406

}

407

408

@Override

409

public boolean next() {

410

if (iterator.hasNext()) {

411

current = iterator.next();

412

return true;

413

}

414

return false;

415

}

416

417

@Override

418

public String getKey() {

419

return current != null ? current.getKey() : null;

420

}

421

422

@Override

423

public Integer getValue() {

424

return current != null ? current.getValue() : null;

425

}

426

427

@Override

428

public void close() {

429

// Clean up resources if needed

430

current = null;

431

}

432

}

433

434

// Usage

435

Map<String, Integer> data = Map.of("a", 1, "b", 2, "c", 3);

436

KVIterator<String, Integer> iterator = new SimpleKVIterator(data);

437

438

while (iterator.next()) {

439

String key = iterator.getKey();

440

Integer value = iterator.getValue();

441

System.out.println(key + " -> " + value);

442

}

443

iterator.close();

444

```

445

446

#### ByteArray Operations

447

448

```java

449

// Working with ByteArray utilities

450

byte[] data1 = "Hello".getBytes(StandardCharsets.UTF_8);

451

byte[] data2 = "World".getBytes(StandardCharsets.UTF_8);

452

453

// Concatenate arrays

454

byte[] concatenated = ByteArray.concat(data1, " ".getBytes(), data2);

455

String result = new String(concatenated, StandardCharsets.UTF_8); // "Hello World"

456

457

// Get prefix for sorting

458

long prefix1 = ByteArray.getPrefix(data1);

459

long prefix2 = ByteArray.getPrefix(data2);

460

int comparison = Long.compare(prefix1, prefix2);

461

462

// SQL-style substring

463

byte[] fullText = "Hello, World!".getBytes(StandardCharsets.UTF_8);

464

byte[] substring = ByteArray.subStringSQL(fullText, 8, 5); // "World" (1-based, length 5)

465

466

// Write to memory

467

MemoryAllocator allocator = MemoryAllocator.HEAP;

468

MemoryBlock block = allocator.allocate(concatenated.length);

469

try {

470

ByteArray.writeToMemory(concatenated, block.getBaseObject(), block.getBaseOffset());

471

// Data is now written to memory block

472

} finally {

473

allocator.free(block);

474

}

475

```