or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-operations.mdhashing-utilities.mdindex.mdmemory-management.mdplatform-operations.mdutf8-string-operations.md

array-operations.mddocs/

0

# Array Operations

1

2

Efficient array implementations and utility methods for byte arrays and long arrays with support for both heap and off-heap memory storage, providing optimized operations for Spark's internal data processing needs.

3

4

## Capabilities

5

6

### LongArray Operations

7

8

Memory-efficient array of long values that can be backed by either heap or off-heap memory, designed for high-performance numerical operations.

9

10

```java { .api }

11

/**

12

* Array of long values supporting on-heap and off-heap memory

13

*/

14

final class LongArray {

15

/**

16

* Create LongArray backed by a memory block

17

* @param memory MemoryBlock providing storage

18

*/

19

public LongArray(MemoryBlock memory);

20

21

/**

22

* Get the underlying memory block

23

* @return MemoryBlock backing this array

24

*/

25

public MemoryBlock memoryBlock();

26

27

/**

28

* Get base object for memory access (null for off-heap)

29

* @return Base object or null

30

*/

31

public Object getBaseObject();

32

33

/**

34

* Get base offset within object or direct address

35

* @return Offset or address

36

*/

37

public long getBaseOffset();

38

39

/**

40

* Get number of long elements in array

41

* @return Array size in elements

42

*/

43

public long size();

44

45

/**

46

* Fill entire array with zeros

47

*/

48

public void zeroOut();

49

50

/**

51

* Set value at specified index

52

* @param index Element index

53

* @param value Long value to store

54

*/

55

public void set(int index, long value);

56

57

/**

58

* Get value at specified index

59

* @param index Element index

60

* @return Long value at index

61

*/

62

public long get(int index);

63

}

64

```

65

66

**Usage Examples:**

67

68

```java

69

import org.apache.spark.unsafe.array.LongArray;

70

import org.apache.spark.unsafe.memory.*;

71

72

// Create LongArray with heap storage

73

MemoryAllocator allocator = MemoryAllocator.HEAP;

74

MemoryBlock block = allocator.allocate(800); // 100 longs * 8 bytes

75

LongArray array = new LongArray(block);

76

77

try {

78

// Initialize array

79

array.zeroOut();

80

81

// Set values

82

for (int i = 0; i < 10; i++) {

83

array.set(i, i * 10L);

84

}

85

86

// Read values

87

for (int i = 0; i < 10; i++) {

88

long value = array.get(i);

89

System.out.println("array[" + i + "] = " + value);

90

}

91

92

// Get array properties

93

long size = array.size();

94

Object baseObj = array.getBaseObject();

95

96

} finally {

97

allocator.free(block);

98

}

99

100

// Create LongArray with off-heap storage

101

MemoryAllocator unsafeAllocator = MemoryAllocator.UNSAFE;

102

MemoryBlock offHeapBlock = unsafeAllocator.allocate(800);

103

LongArray offHeapArray = new LongArray(offHeapBlock);

104

// ... use array ...

105

unsafeAllocator.free(offHeapBlock);

106

```

107

108

### ByteArrayMethods Utilities

109

110

Comprehensive utility methods for byte array operations including comparisons, searching, and memory-related calculations.

111

112

```java { .api }

113

/**

114

* Utility methods for byte array operations with unsafe optimizations

115

*/

116

class ByteArrayMethods {

117

118

// Constants

119

/**

120

* Maximum safe array length for rounded arrays

121

*/

122

public static final int MAX_ROUNDED_ARRAY_LENGTH;

123

124

// Memory alignment utilities

125

/**

126

* Get next power of 2 greater than or equal to num

127

* @param num Input number

128

* @return Next power of 2

129

*/

130

public static long nextPowerOf2(long num);

131

132

/**

133

* Round number of bytes to nearest word boundary

134

* @param numBytes Number of bytes

135

* @return Rounded byte count

136

*/

137

public static int roundNumberOfBytesToNearestWord(int numBytes);

138

139

/**

140

* Round number of bytes to nearest word boundary (long version)

141

* @param numBytes Number of bytes

142

* @return Rounded byte count

143

*/

144

public static long roundNumberOfBytesToNearestWord(long numBytes);

145

146

// Array comparison operations

147

/**

148

* Compare two byte arrays for equality using unsafe operations

149

* @param leftBase Left array base object (null for off-heap)

150

* @param leftOffset Left array offset or address

151

* @param rightBase Right array base object (null for off-heap)

152

* @param rightOffset Right array offset or address

153

* @param length Number of bytes to compare

154

* @return true if arrays are equal

155

*/

156

public static boolean arrayEquals(Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length);

157

158

// Array search operations

159

/**

160

* Check if byte array contains sub-array

161

* @param arr Main byte array

162

* @param sub Sub-array to search for

163

* @return true if sub-array is found

164

*/

165

public static boolean contains(byte[] arr, byte[] sub);

166

167

/**

168

* Check if array starts with target sub-array

169

* @param array Main byte array

170

* @param target Target sub-array

171

* @return true if array starts with target

172

*/

173

public static boolean startsWith(byte[] array, byte[] target);

174

175

/**

176

* Check if array ends with target sub-array

177

* @param array Main byte array

178

* @param target Target sub-array

179

* @return true if array ends with target

180

*/

181

public static boolean endsWith(byte[] array, byte[] target);

182

183

/**

184

* Check if sub-array matches at specific position

185

* @param arr Main byte array

186

* @param sub Sub-array to match

187

* @param pos Position to check match

188

* @return true if sub-array matches at position

189

*/

190

public static boolean matchAt(byte[] arr, byte[] sub, int pos);

191

}

192

```

193

194

**Usage Examples:**

195

196

```java

197

import org.apache.spark.unsafe.array.ByteArrayMethods;

198

199

// Memory alignment calculations

200

long size = 1000;

201

long powerOf2 = ByteArrayMethods.nextPowerOf2(size); // Next power of 2 >= 1000

202

int alignedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(37); // Round to word boundary

203

204

// Array comparisons using unsafe operations

205

byte[] array1 = "Hello World".getBytes();

206

byte[] array2 = "Hello World".getBytes();

207

208

boolean areEqual = ByteArrayMethods.arrayEquals(

209

array1, Platform.BYTE_ARRAY_OFFSET,

210

array2, Platform.BYTE_ARRAY_OFFSET,

211

array1.length

212

);

213

214

// Array searching operations

215

byte[] mainArray = "Hello World Program".getBytes();

216

byte[] searchFor = "World".getBytes();

217

218

boolean contains = ByteArrayMethods.contains(mainArray, searchFor);

219

boolean starts = ByteArrayMethods.startsWith(mainArray, "Hello".getBytes());

220

boolean ends = ByteArrayMethods.endsWith(mainArray, "Program".getBytes());

221

boolean matches = ByteArrayMethods.matchAt(mainArray, "World".getBytes(), 6);

222

223

System.out.println("Contains 'World': " + contains);

224

System.out.println("Starts with 'Hello': " + starts);

225

System.out.println("Ends with 'Program': " + ends);

226

System.out.println("'World' at position 6: " + matches);

227

```

228

229

### ByteArray Utilities

230

231

Specialized utilities for byte array operations including comparison, manipulation, and SQL-style string operations.

232

233

```java { .api }

234

/**

235

* Utility methods for byte array operations and comparisons

236

*/

237

final class ByteArray {

238

239

// Constants

240

/**

241

* Empty byte array constant

242

*/

243

public static final byte[] EMPTY_BYTE;

244

245

// Memory operations

246

/**

247

* Write byte array to memory location

248

* @param src Source byte array

249

* @param target Target object (null for off-heap)

250

* @param targetOffset Target offset or address

251

*/

252

public static void writeToMemory(byte[] src, Object target, long targetOffset);

253

254

/**

255

* Get sorting prefix from byte array

256

* @param bytes Byte array

257

* @return Long prefix for sorting

258

*/

259

public static long getPrefix(byte[] bytes);

260

261

// Comparison operations

262

/**

263

* Compare two byte arrays lexicographically

264

* @param leftBase Left byte array

265

* @param rightBase Right byte array

266

* @return Comparison result (negative, zero, positive)

267

*/

268

public static int compareBinary(byte[] leftBase, byte[] rightBase);

269

270

// String-like operations

271

/**

272

* Extract substring from byte array (SQL-style, 1-based)

273

* @param bytes Source byte array

274

* @param pos Starting position (1-based)

275

* @param len Length of substring

276

* @return Substring as byte array

277

*/

278

public static byte[] subStringSQL(byte[] bytes, int pos, int len);

279

280

/**

281

* Concatenate multiple byte arrays

282

* @param inputs Byte arrays to concatenate

283

* @return Concatenated byte array

284

*/

285

public static byte[] concat(byte[]... inputs);

286

287

/**

288

* Left pad byte array to specified length

289

* @param bytes Source byte array

290

* @param len Target length

291

* @param pad Padding bytes

292

* @return Left-padded byte array

293

*/

294

public static byte[] lpad(byte[] bytes, int len, byte[] pad);

295

296

/**

297

* Right pad byte array to specified length

298

* @param bytes Source byte array

299

* @param len Target length

300

* @param pad Padding bytes

301

* @return Right-padded byte array

302

*/

303

public static byte[] rpad(byte[] bytes, int len, byte[] pad);

304

}

305

```

306

307

**Usage Examples:**

308

309

```java

310

import org.apache.spark.unsafe.types.ByteArray;

311

import org.apache.spark.unsafe.Platform;

312

313

// Memory operations

314

byte[] data = "Hello".getBytes();

315

byte[] buffer = new byte[100];

316

317

ByteArray.writeToMemory(data, buffer, Platform.BYTE_ARRAY_OFFSET);

318

long prefix = ByteArray.getPrefix(data);

319

320

// Array comparison

321

byte[] array1 = "apple".getBytes();

322

byte[] array2 = "banana".getBytes();

323

int comparison = ByteArray.compareBinary(array1, array2); // negative

324

325

// String-like operations on byte arrays

326

byte[] source = "Hello World".getBytes();

327

328

// Extract substring (SQL-style, 1-based indexing)

329

byte[] substring = ByteArray.subStringSQL(source, 7, 5); // "World"

330

331

// Concatenation

332

byte[] part1 = "Hello".getBytes();

333

byte[] part2 = " ".getBytes();

334

byte[] part3 = "World".getBytes();

335

byte[] concatenated = ByteArray.concat(part1, part2, part3);

336

337

// Padding operations

338

byte[] text = "Hi".getBytes();

339

byte[] spaces = " ".getBytes();

340

byte[] leftPadded = ByteArray.lpad(text, 10, spaces); // " Hi"

341

byte[] rightPadded = ByteArray.rpad(text, 10, spaces); // "Hi "

342

343

// Working with empty arrays

344

byte[] empty = ByteArray.EMPTY_BYTE;

345

System.out.println("Empty array length: " + empty.length);

346

```

347

348

## Performance Optimizations

349

350

### Memory Alignment

351

352

ByteArrayMethods provides utilities for optimizing memory access through proper alignment:

353

354

```java

355

// Calculate optimal buffer sizes

356

int originalSize = 1000;

357

int wordAligned = ByteArrayMethods.roundNumberOfBytesToNearestWord(originalSize);

358

359

// Use power-of-2 sizes for better memory allocation

360

long optimalSize = ByteArrayMethods.nextPowerOf2(originalSize);

361

```

362

363

### Unsafe Array Comparisons

364

365

For maximum performance, use unsafe array comparison methods:

366

367

```java

368

import org.apache.spark.unsafe.Platform;

369

import org.apache.spark.unsafe.array.ByteArrayMethods;

370

371

// Fast array comparison using unsafe operations

372

byte[] array1 = getData1();

373

byte[] array2 = getData2();

374

375

boolean equal = ByteArrayMethods.arrayEquals(

376

array1, Platform.BYTE_ARRAY_OFFSET,

377

array2, Platform.BYTE_ARRAY_OFFSET,

378

Math.min(array1.length, array2.length)

379

);

380

```

381

382

### Memory-Efficient Array Creation

383

384

When working with LongArray, consider memory allocation strategy:

385

386

```java

387

// For temporary arrays, use heap allocation

388

MemoryAllocator heapAllocator = MemoryAllocator.HEAP;

389

MemoryBlock heapBlock = heapAllocator.allocate(elementCount * 8);

390

LongArray heapArray = new LongArray(heapBlock);

391

392

// For long-lived arrays, consider off-heap allocation

393

MemoryAllocator offHeapAllocator = MemoryAllocator.UNSAFE;

394

MemoryBlock offHeapBlock = offHeapAllocator.allocate(elementCount * 8);

395

LongArray offHeapArray = new LongArray(offHeapBlock);

396

```

397

398

## Memory Management Best Practices

399

400

### Resource Cleanup

401

402

Always ensure proper cleanup of allocated memory:

403

404

```java

405

import org.apache.spark.unsafe.array.LongArray;

406

import org.apache.spark.unsafe.memory.*;

407

408

public void processLongArray(int size) {

409

MemoryAllocator allocator = MemoryAllocator.HEAP;

410

MemoryBlock block = null;

411

412

try {

413

block = allocator.allocate(size * 8);

414

LongArray array = new LongArray(block);

415

416

// Use the array

417

array.zeroOut();

418

for (int i = 0; i < size; i++) {

419

array.set(i, i * 2L);

420

}

421

422

// Process data

423

processData(array);

424

425

} catch (OutOfMemoryError e) {

426

System.err.println("Failed to allocate array: " + e.getMessage());

427

} finally {

428

if (block != null) {

429

allocator.free(block);

430

}

431

}

432

}

433

```

434

435

### Array Size Considerations

436

437

Consider maximum array size limits:

438

439

```java

440

// Check against maximum safe array length

441

int requestedSize = calculateRequiredSize();

442

if (requestedSize > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {

443

throw new IllegalArgumentException("Array size too large: " + requestedSize);

444

}

445

446

int safeSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(requestedSize);

447

```

448

449

## Integration with Other Components

450

451

### Working with Platform Operations

452

453

Array operations integrate seamlessly with Platform methods:

454

455

```java

456

import org.apache.spark.unsafe.Platform;

457

import org.apache.spark.unsafe.array.LongArray;

458

459

LongArray array = createLongArray(100);

460

461

// Direct memory access using Platform

462

Object baseObj = array.getBaseObject();

463

long baseOffset = array.getBaseOffset();

464

465

// Set value using Platform (equivalent to array.set(0, 42L))

466

Platform.putLong(baseObj, baseOffset, 42L);

467

468

// Get value using Platform (equivalent to array.get(0))

469

long value = Platform.getLong(baseObj, baseOffset);

470

```

471

472

### Memory Block Integration

473

474

LongArray works directly with MemoryBlock instances:

475

476

```java

477

// Create from existing MemoryBlock

478

MemoryBlock existingBlock = allocateMemoryBlock(800);

479

LongArray array = new LongArray(existingBlock);

480

481

// Access underlying MemoryBlock

482

MemoryBlock block = array.memoryBlock();

483

long blockSize = block.size();

484

int pageNumber = block.pageNumber;

485

```