or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

exception-handling.mdindex.mdjava-api-functions.mdlogging.mdnetwork-utilities.mdstorage-configuration.md

java-api-functions.mddocs/

0

# Java API Integration

1

2

Comprehensive functional interfaces for Spark's Java API, enabling type-safe lambda expressions and functional programming patterns for Java developers using Spark operations.

3

4

## Capabilities

5

6

### Core Function Interfaces

7

8

Base functional interfaces for common transformation and action operations.

9

10

```java { .api }

11

/**

12

* Base single-argument function interface

13

* @param <T1> - Input type

14

* @param <R> - Return type

15

*/

16

@FunctionalInterface

17

public interface Function<T1, R> extends Serializable {

18

/**

19

* Apply function to input value

20

* @param v1 - Input value

21

* @return Transformed result

22

* @throws Exception - Any exception during function execution

23

*/

24

R call(T1 v1) throws Exception;

25

}

26

27

/**

28

* No-argument function interface

29

* @param <R> - Return type

30

*/

31

@FunctionalInterface

32

public interface Function0<R> extends Serializable {

33

/**

34

* Apply function with no arguments

35

* @return Function result

36

* @throws Exception - Any exception during function execution

37

*/

38

R call() throws Exception;

39

}

40

41

/**

42

* Two-argument function interface

43

* @param <T1> - First input type

44

* @param <T2> - Second input type

45

* @param <R> - Return type

46

*/

47

@FunctionalInterface

48

public interface Function2<T1, T2, R> extends Serializable {

49

/**

50

* Apply function to two input values

51

* @param v1 - First input value

52

* @param v2 - Second input value

53

* @return Transformed result

54

* @throws Exception - Any exception during function execution

55

*/

56

R call(T1 v1, T2 v2) throws Exception;

57

}

58

59

/**

60

* Three-argument function interface

61

* @param <T1> - First input type

62

* @param <T2> - Second input type

63

* @param <T3> - Third input type

64

* @param <R> - Return type

65

*/

66

@FunctionalInterface

67

public interface Function3<T1, T2, T3, R> extends Serializable {

68

/**

69

* Apply function to three input values

70

* @param v1 - First input value

71

* @param v2 - Second input value

72

* @param v3 - Third input value

73

* @return Transformed result

74

* @throws Exception - Any exception during function execution

75

*/

76

R call(T1 v1, T2 v2, T3 v3) throws Exception;

77

}

78

79

/**

80

* Four-argument function interface

81

* @param <T1> - First input type

82

* @param <T2> - Second input type

83

* @param <T3> - Third input type

84

* @param <T4> - Fourth input type

85

* @param <R> - Return type

86

*/

87

@FunctionalInterface

88

public interface Function4<T1, T2, T3, T4, R> extends Serializable {

89

/**

90

* Apply function to four input values

91

* @param v1 - First input value

92

* @param v2 - Second input value

93

* @param v3 - Third input value

94

* @param v4 - Fourth input value

95

* @return Transformed result

96

* @throws Exception - Any exception during function execution

97

*/

98

R call(T1 v1, T2 v2, T3 v3, T4 v4) throws Exception;

99

}

100

```

101

102

### Specialized Function Interfaces

103

104

Advanced functional interfaces for specific Spark operations like pair RDD creation, flatMap operations, and void functions.

105

106

```java { .api }

107

/**

108

* Function that returns key-value pairs for creating PairRDDs

109

* @param <T> - Input type

110

* @param <K> - Key type

111

* @param <V> - Value type

112

*/

113

@FunctionalInterface

114

public interface PairFunction<T, K, V> extends Serializable {

115

/**

116

* Returns a key-value pair from input

117

* @param t - Input value

118

* @return Tuple2 containing key and value

119

* @throws Exception - Any exception during function execution

120

*/

121

Tuple2<K, V> call(T t) throws Exception;

122

}

123

124

/**

125

* Function that returns zero or more output records from each input record

126

* @param <T> - Input type

127

* @param <R> - Output type

128

*/

129

@FunctionalInterface

130

public interface FlatMapFunction<T, R> extends Serializable {

131

/**

132

* Returns iterator of output records

133

* @param t - Input value

134

* @return Iterator over output values

135

* @throws Exception - Any exception during function execution

136

*/

137

Iterator<R> call(T t) throws Exception;

138

}

139

140

/**

141

* Function with no return value for actions

142

* @param <T> - Input type

143

*/

144

@FunctionalInterface

145

public interface VoidFunction<T> extends Serializable {

146

/**

147

* Execute action on input value

148

* @param t - Input value

149

* @throws Exception - Any exception during function execution

150

*/

151

void call(T t) throws Exception;

152

}

153

154

/**

155

* Two-argument void function for actions

156

* @param <T1> - First input type

157

* @param <T2> - Second input type

158

*/

159

@FunctionalInterface

160

public interface VoidFunction2<T1, T2> extends Serializable {

161

/**

162

* Execute action on two input values

163

* @param v1 - First input value

164

* @param v2 - Second input value

165

* @throws Exception - Any exception during function execution

166

*/

167

void call(T1 v1, T2 v2) throws Exception;

168

}

169

170

/**

171

* Function for filtering operations

172

* @param <T> - Input type

173

*/

174

@FunctionalInterface

175

public interface FilterFunction<T> extends Serializable {

176

/**

177

* Test whether input should be included

178

* @param value - Input value to test

179

* @return true if value should be included

180

* @throws Exception - Any exception during function execution

181

*/

182

boolean call(T value) throws Exception;

183

}

184

185

/**

186

* Function for reducing operations

187

* @param <T> - Input and output type

188

*/

189

@FunctionalInterface

190

public interface ReduceFunction<T> extends Serializable {

191

/**

192

* Combine two values into one

193

* @param v1 - First value

194

* @param v2 - Second value

195

* @return Combined result

196

* @throws Exception - Any exception during function execution

197

*/

198

T call(T v1, T v2) throws Exception;

199

}

200

201

/**

202

* Function for operations on each partition

203

* @param <T> - Input type

204

*/

205

@FunctionalInterface

206

public interface ForeachPartitionFunction<T> extends Serializable {

207

/**

208

* Execute action on partition iterator

209

* @param t - Iterator over partition elements

210

* @throws Exception - Any exception during function execution

211

*/

212

void call(Iterator<T> t) throws Exception;

213

}

214

```

215

216

**Usage Examples:**

217

218

```java

219

import org.apache.spark.api.java.function.*;

220

import org.apache.spark.api.java.JavaRDD;

221

222

// Single-argument function for map operations

223

Function<String, Integer> stringLength = s -> s.length();

224

JavaRDD<Integer> lengths = stringRDD.map(stringLength);

225

226

// Two-argument function for reduce operations

227

Function2<Integer, Integer, Integer> sum = (a, b) -> a + b;

228

int total = numberRDD.reduce(sum);

229

230

// No-argument function for suppliers

231

Function0<String> currentTime = () -> java.time.Instant.now().toString();

232

233

// Multi-argument functions for complex operations

234

Function3<String, Integer, Boolean, String> formatter =

235

(str, num, flag) -> flag ? str.toUpperCase() + num : str + num;

236

```

237

238

### Void Functions

239

240

Functions that perform side effects without returning values, commonly used for actions like foreach.

241

242

```java { .api }

243

/**

244

* Function with no return value (void function)

245

* @param <T> - Input type

246

*/

247

@FunctionalInterface

248

public interface VoidFunction<T> extends Serializable {

249

/**

250

* Apply function to input value with no return

251

* @param t - Input value

252

* @throws Exception - Any exception during function execution

253

*/

254

void call(T t) throws Exception;

255

}

256

257

/**

258

* Two-argument function with no return value

259

* @param <T1> - First input type

260

* @param <T2> - Second input type

261

*/

262

@FunctionalInterface

263

public interface VoidFunction2<T1, T2> extends Serializable {

264

/**

265

* Apply function to two input values with no return

266

* @param t1 - First input value

267

* @param t2 - Second input value

268

* @throws Exception - Any exception during function execution

269

*/

270

void call(T1 t1, T2 t2) throws Exception;

271

}

272

```

273

274

**Usage Examples:**

275

276

```java

277

import org.apache.spark.api.java.function.*;

278

279

// Void function for foreach operations

280

VoidFunction<String> printString = s -> System.out.println(s);

281

stringRDD.foreach(printString);

282

283

// Void function for side effects

284

VoidFunction<Integer> incrementCounter = i -> {

285

counter.addAndGet(i);

286

logger.info("Processed: " + i);

287

};

288

numberRDD.foreach(incrementCounter);

289

290

// Two-argument void function

291

VoidFunction2<String, Integer> logPair = (key, value) -> {

292

System.out.println("Key: " + key + ", Value: " + value);

293

};

294

```

295

296

### Specialized Transformation Functions

297

298

Functions designed for specific Spark operations like mapping, filtering, and flat mapping.

299

300

```java { .api }

301

/**

302

* Function for mapping transformations

303

* @param <T> - Input type

304

* @param <R> - Output type

305

*/

306

@FunctionalInterface

307

public interface MapFunction<T, R> extends Serializable {

308

/**

309

* Transform input value to output value

310

* @param value - Input value

311

* @return Transformed value

312

* @throws Exception - Any exception during transformation

313

*/

314

R call(T value) throws Exception;

315

}

316

317

/**

318

* Function for filtering operations

319

* @param <T> - Input type

320

*/

321

@FunctionalInterface

322

public interface FilterFunction<T> extends Serializable {

323

/**

324

* Test if value should be included in result

325

* @param value - Input value to test

326

* @return true if value should be included, false otherwise

327

* @throws Exception - Any exception during filtering

328

*/

329

boolean call(T value) throws Exception;

330

}

331

332

/**

333

* Function for flat mapping operations (one-to-many)

334

* @param <T> - Input type

335

* @param <R> - Output element type

336

*/

337

@FunctionalInterface

338

public interface FlatMapFunction<T, R> extends Serializable {

339

/**

340

* Transform single input to iterator of outputs

341

* @param t - Input value

342

* @return Iterator of output values

343

* @throws Exception - Any exception during transformation

344

*/

345

Iterator<R> call(T t) throws Exception;

346

}

347

348

/**

349

* Function for flat mapping operations with two arguments

350

* @param <A> - First input type

351

* @param <B> - Second input type

352

* @param <R> - Output element type

353

*/

354

@FunctionalInterface

355

public interface FlatMapFunction2<A, B, R> extends Serializable {

356

/**

357

* Transform two inputs to iterator of outputs

358

* @param a - First input value

359

* @param b - Second input value

360

* @return Iterator of output values

361

* @throws Exception - Any exception during transformation

362

*/

363

Iterator<R> call(A a, B b) throws Exception;

364

}

365

```

366

367

**Usage Examples:**

368

369

```java

370

import org.apache.spark.api.java.function.*;

371

import java.util.*;

372

373

// Map function for Dataset operations

374

MapFunction<Person, String> getName = person -> person.getName();

375

Dataset<String> names = personDataset.map(getName, Encoders.STRING());

376

377

// Filter function

378

FilterFunction<Integer> isEven = i -> i % 2 == 0;

379

Dataset<Integer> evenNumbers = numberDataset.filter(isEven);

380

381

// Flat map function - split strings into words

382

FlatMapFunction<String, String> splitWords = line -> {

383

return Arrays.asList(line.split(" ")).iterator();

384

};

385

JavaRDD<String> words = linesRDD.flatMap(splitWords);

386

387

// Two-argument flat map

388

FlatMapFunction2<String, String, String> combineAndSplit = (s1, s2) -> {

389

String combined = s1 + " " + s2;

390

return Arrays.asList(combined.split(" ")).iterator();

391

};

392

```

393

394

### Pair Functions

395

396

Functions that work with key-value pairs, essential for operations like groupByKey and reduceByKey.

397

398

```java { .api }

399

/**

400

* Function that produces key-value pairs (Tuple2)

401

* @param <T> - Input type

402

* @param <K> - Key type

403

* @param <V> - Value type

404

*/

405

@FunctionalInterface

406

public interface PairFunction<T, K, V> extends Serializable {

407

/**

408

* Transform input to key-value pair

409

* @param t - Input value

410

* @return Tuple2 containing key and value

411

* @throws Exception - Any exception during transformation

412

*/

413

Tuple2<K, V> call(T t) throws Exception;

414

}

415

416

/**

417

* Flat map function that produces key-value pairs

418

* @param <T> - Input type

419

* @param <K> - Key type

420

* @param <V> - Value type

421

*/

422

@FunctionalInterface

423

public interface PairFlatMapFunction<T, K, V> extends Serializable {

424

/**

425

* Transform input to iterator of key-value pairs

426

* @param t - Input value

427

* @return Iterator of Tuple2 containing keys and values

428

* @throws Exception - Any exception during transformation

429

*/

430

Iterator<Tuple2<K, V>> call(T t) throws Exception;

431

}

432

```

433

434

**Usage Examples:**

435

436

```java

437

import org.apache.spark.api.java.function.*;

438

import scala.Tuple2;

439

440

// Pair function for creating key-value pairs

441

PairFunction<String, String, Integer> wordToPair =

442

word -> new Tuple2<>(word, 1);

443

JavaPairRDD<String, Integer> wordCounts = wordsRDD.mapToPair(wordToPair);

444

445

// Pair flat map function

446

PairFlatMapFunction<String, String, Integer> lineToPairs = line -> {

447

List<Tuple2<String, Integer>> pairs = new ArrayList<>();

448

for (String word : line.split(" ")) {

449

pairs.add(new Tuple2<>(word, 1));

450

}

451

return pairs.iterator();

452

};

453

JavaPairRDD<String, Integer> wordPairs = linesRDD.flatMapToPair(lineToPairs);

454

```

455

456

### Reduction Functions

457

458

Functions for aggregation and reduction operations.

459

460

```java { .api }

461

/**

462

* Function for reduction operations

463

* @param <T> - Type being reduced

464

*/

465

@FunctionalInterface

466

public interface ReduceFunction<T> extends Serializable {

467

/**

468

* Combine two values into one

469

* @param v1 - First value

470

* @param v2 - Second value

471

* @return Combined result

472

* @throws Exception - Any exception during reduction

473

*/

474

T call(T v1, T v2) throws Exception;

475

}

476

```

477

478

**Usage Examples:**

479

480

```java

481

import org.apache.spark.api.java.function.*;

482

483

// Reduction function for summing integers

484

ReduceFunction<Integer> sum = (a, b) -> a + b;

485

int total = numberRDD.reduce(sum);

486

487

// Reduction function for finding maximum

488

ReduceFunction<Double> max = (a, b) -> Math.max(a, b);

489

double maximum = doubleRDD.reduce(max);

490

491

// Reduction function for string concatenation

492

ReduceFunction<String> concat = (s1, s2) -> s1 + " " + s2;

493

String combined = stringRDD.reduce(concat);

494

```

495

496

### Specialized Numeric Functions

497

498

Functions specifically designed for numeric operations.

499

500

```java { .api }

501

/**

502

* Function that returns double values

503

* @param <T> - Input type

504

*/

505

@FunctionalInterface

506

public interface DoubleFunction<T> extends Serializable {

507

/**

508

* Transform input to double value

509

* @param t - Input value

510

* @return Double result

511

* @throws Exception - Any exception during transformation

512

*/

513

double call(T t) throws Exception;

514

}

515

516

/**

517

* Flat map function that returns double values

518

* @param <T> - Input type

519

*/

520

@FunctionalInterface

521

public interface DoubleFlatMapFunction<T> extends Serializable {

522

/**

523

* Transform input to iterator of double values

524

* @param t - Input value

525

* @return Iterator of double values

526

* @throws Exception - Any exception during transformation

527

*/

528

Iterator<Double> call(T t) throws Exception;

529

}

530

```

531

532

**Usage Examples:**

533

534

```java

535

import org.apache.spark.api.java.function.*;

536

537

// Double function for numeric extraction

538

DoubleFunction<String> parseDouble = s -> Double.parseDouble(s);

539

JavaDoubleRDD doubleRDD = stringRDD.mapToDouble(parseDouble);

540

541

// Double flat map function

542

DoubleFlatMapFunction<String> extractNumbers = line -> {

543

List<Double> numbers = new ArrayList<>();

544

for (String token : line.split(" ")) {

545

try {

546

numbers.add(Double.parseDouble(token));

547

} catch (NumberFormatException e) {

548

// Skip non-numeric tokens

549

}

550

}

551

return numbers.iterator();

552

};

553

```

554

555

### Advanced Grouping Functions

556

557

Functions for advanced grouping and co-grouping operations.

558

559

```java { .api }

560

/**

561

* Function for co-grouping operations

562

* @param <V1> - First value type

563

* @param <V2> - Second value type

564

* @param <R> - Result type

565

*/

566

@FunctionalInterface

567

public interface CoGroupFunction<V1, V2, R> extends Serializable {

568

/**

569

* Process co-grouped values

570

* @param v1 - Iterator of first group values

571

* @param v2 - Iterator of second group values

572

* @return Processing result

573

* @throws Exception - Any exception during processing

574

*/

575

R call(Iterator<V1> v1, Iterator<V2> v2) throws Exception;

576

}

577

578

/**

579

* Function for mapping grouped data

580

* @param <K> - Key type

581

* @param <V> - Value type

582

* @param <R> - Result type

583

*/

584

@FunctionalInterface

585

public interface MapGroupsFunction<K, V, R> extends Serializable {

586

/**

587

* Process grouped values for a key

588

* @param key - Group key

589

* @param values - Iterator of values for the key

590

* @return Processing result

591

* @throws Exception - Any exception during processing

592

*/

593

R call(K key, Iterator<V> values) throws Exception;

594

}

595

596

/**

597

* Function for flat mapping grouped data

598

* @param <K> - Key type

599

* @param <V> - Value type

600

* @param <R> - Result element type

601

*/

602

@FunctionalInterface

603

public interface FlatMapGroupsFunction<K, V, R> extends Serializable {

604

/**

605

* Process grouped values and return iterator of results

606

* @param key - Group key

607

* @param values - Iterator of values for the key

608

* @return Iterator of processing results

609

* @throws Exception - Any exception during processing

610

*/

611

Iterator<R> call(K key, Iterator<V> values) throws Exception;

612

}

613

```

614

615

**Usage Examples:**

616

617

```java

618

import org.apache.spark.api.java.function.*;

619

620

// Map groups function for aggregation

621

MapGroupsFunction<String, Integer, Double> computeAverage = (key, values) -> {

622

int sum = 0;

623

int count = 0;

624

while (values.hasNext()) {

625

sum += values.next();

626

count++;

627

}

628

return count > 0 ? (double) sum / count : 0.0;

629

};

630

631

// Flat map groups function for expansion

632

FlatMapGroupsFunction<String, Person, String> extractEmails = (department, people) -> {

633

List<String> emails = new ArrayList<>();

634

while (people.hasNext()) {

635

Person person = people.next();

636

if (person.getEmail() != null) {

637

emails.add(person.getEmail());

638

}

639

}

640

return emails.iterator();

641

};

642

```

643

644

### Partition and Action Functions

645

646

Functions for partition-wise operations and actions.

647

648

```java { .api }

649

/**

650

* Function for mapping entire partitions

651

* @param <T> - Input element type

652

* @param <R> - Output element type

653

*/

654

@FunctionalInterface

655

public interface MapPartitionsFunction<T, R> extends Serializable {

656

/**

657

* Process entire partition

658

* @param input - Iterator of partition elements

659

* @return Iterator of results

660

* @throws Exception - Any exception during processing

661

*/

662

Iterator<R> call(Iterator<T> input) throws Exception;

663

}

664

665

/**

666

* Function for foreach operations on elements

667

* @param <T> - Input type

668

*/

669

@FunctionalInterface

670

public interface ForeachFunction<T> extends Serializable {

671

/**

672

* Process single element (side effect)

673

* @param t - Input element

674

* @throws Exception - Any exception during processing

675

*/

676

void call(T t) throws Exception;

677

}

678

679

/**

680

* Function for foreach operations on partitions

681

* @param <T> - Input element type

682

*/

683

@FunctionalInterface

684

public interface ForeachPartitionFunction<T> extends Serializable {

685

/**

686

* Process entire partition (side effect)

687

* @param t - Iterator of partition elements

688

* @throws Exception - Any exception during processing

689

*/

690

void call(Iterator<T> t) throws Exception;

691

}

692

```

693

694

**Usage Examples:**

695

696

```java

697

import org.apache.spark.api.java.function.*;

698

699

// Map partitions function for batch processing

700

MapPartitionsFunction<String, String> processPartition = partition -> {

701

List<String> results = new ArrayList<>();

702

BatchProcessor processor = new BatchProcessor();

703

704

while (partition.hasNext()) {

705

results.add(processor.process(partition.next()));

706

}

707

708

processor.close();

709

return results.iterator();

710

};

711

712

// Foreach function for side effects

713

ForeachFunction<String> writeToFile = line -> {

714

fileWriter.write(line + "\n");

715

};

716

717

// Foreach partition function for batch side effects

718

ForeachPartitionFunction<Record> batchInsert = records -> {

719

DatabaseConnection conn = getConnection();

720

PreparedStatement stmt = conn.prepareStatement("INSERT INTO table VALUES (?)");

721

722

while (records.hasNext()) {

723

stmt.setString(1, records.next().getValue());

724

stmt.addBatch();

725

}

726

727

stmt.executeBatch();

728

conn.close();

729

};

730

```

731

732

## Type Definitions

733

734

```java { .api }

735

// Core function interfaces

736

@FunctionalInterface

737

interface Function<T1, R> extends Serializable {

738

R call(T1 v1) throws Exception;

739

}

740

741

@FunctionalInterface

742

interface Function2<T1, T2, R> extends Serializable {

743

R call(T1 v1, T2 v2) throws Exception;

744

}

745

746

@FunctionalInterface

747

interface VoidFunction<T> extends Serializable {

748

void call(T t) throws Exception;

749

}

750

751

// Specialized transformation interfaces

752

@FunctionalInterface

753

interface MapFunction<T, R> extends Serializable {

754

R call(T value) throws Exception;

755

}

756

757

@FunctionalInterface

758

interface FilterFunction<T> extends Serializable {

759

boolean call(T value) throws Exception;

760

}

761

762

@FunctionalInterface

763

interface FlatMapFunction<T, R> extends Serializable {

764

Iterator<R> call(T t) throws Exception;

765

}

766

767

// Pair operation interfaces

768

@FunctionalInterface

769

interface PairFunction<T, K, V> extends Serializable {

770

Tuple2<K, V> call(T t) throws Exception;

771

}

772

773

// Reduction and aggregation interfaces

774

@FunctionalInterface

775

interface ReduceFunction<T> extends Serializable {

776

T call(T v1, T v2) throws Exception;

777

}

778

779

// Numeric operation interfaces

780

@FunctionalInterface

781

interface DoubleFunction<T> extends Serializable {

782

double call(T t) throws Exception;

783

}

784

785

// Grouping operation interfaces

786

@FunctionalInterface

787

interface MapGroupsFunction<K, V, R> extends Serializable {

788

R call(K key, Iterator<V> values) throws Exception;

789

}

790

```