or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

build-info.mdexception-handling.mdindex.mdjava-functions.mdlogging.mdstorage-management.mdutilities.md
tile.json

java-functions.mddocs/

Java Functional Interfaces

Comprehensive set of functional interfaces enabling Java integration with Spark's functional programming model through lambda expressions, method references, and anonymous functions.

Capabilities

Core Function Interfaces

Base functional interfaces for transforming and processing data with different arities.

/**
 * Base interface for functions whose return types do not create special RDDs
 * Supports lambda expressions and method references
 */
@FunctionalInterface
public interface Function<T1, R> extends Serializable {
  /**
   * Apply function to input value
   * @param v1 Input value
   * @return Transformed result
   * @throws Exception If transformation fails
   */
  R call(T1 v1) throws Exception;
}

/**
 * Function with no arguments
 */
@FunctionalInterface
public interface Function0<R> extends Serializable {
  R call() throws Exception;
}

/**
 * Function with two arguments
 */
@FunctionalInterface  
public interface Function2<T1, T2, R> extends Serializable {
  R call(T1 v1, T2 v2) throws Exception;
}

/**
 * Function with three arguments
 */
@FunctionalInterface
public interface Function3<T1, T2, T3, R> extends Serializable {
  R call(T1 v1, T2 v2, T3 v3) throws Exception;
}

/**
 * Function with four arguments
 */
@FunctionalInterface
public interface Function4<T1, T2, T3, T4, R> extends Serializable {
  R call(T1 v1, T2 v2, T3 v3, T4 v4) throws Exception;
}

Usage Examples:

import org.apache.spark.api.java.function.*;

// Lambda expressions
Function<String, Integer> stringLength = s -> s.length();
Function<Integer, String> intToString = i -> i.toString();

// Method references  
Function<String, String> toUpperCase = String::toUpperCase;
Function0<Long> currentTime = System::currentTimeMillis;

// Multi-argument functions
Function2<Integer, Integer, Integer> add = (a, b) -> a + b;
Function3<String, String, String, String> concat3 = (a, b, c) -> a + b + c;

// Anonymous function classes
Function<Employee, String> getName = new Function<Employee, String>() {
  @Override
  public String call(Employee emp) throws Exception {
    return emp.getName();
  }
};

Dataset and RDD Transformation Interfaces

Specialized interfaces for common Dataset and RDD operations.

/**
 * Base interface for map functions used in Dataset operations
 * Transforms each element individually
 */
@FunctionalInterface
public interface MapFunction<T, U> extends Serializable {
  /**
   * Transform input value to output value
   * @param value Input value
   * @return Transformed output value
   * @throws Exception If transformation fails
   */
  U call(T value) throws Exception;
}

/**
 * Interface for flat map operations that produce multiple output elements
 */
@FunctionalInterface
public interface FlatMapFunction<T, R> extends Serializable {
  /**
   * Transform single input into multiple outputs
   * @param t Input value
   * @return Iterator over output values
   * @throws Exception If transformation fails
   */
  Iterator<R> call(T t) throws Exception;
}

/**
 * Interface for filtering operations
 */
@FunctionalInterface
public interface FilterFunction<T> extends Serializable {
  /**
   * Test whether element should be included in result
   * @param value Input value to test
   * @return true if element should be included
   * @throws Exception If predicate evaluation fails
   */
  boolean call(T value) throws Exception;
}

/**
 * Interface for map partitions operations
 * Transforms entire partitions rather than individual elements
 */
@FunctionalInterface
public interface MapPartitionsFunction<T, U> extends Serializable {
  /**
   * Transform partition of elements
   * @param input Iterator over partition elements
   * @return Iterator over transformed elements
   * @throws Exception If transformation fails
   */
  Iterator<U> call(Iterator<T> input) throws Exception;
}

Usage Examples:

// Map transformations
MapFunction<String, Integer> parseInteger = Integer::parseInt;
MapFunction<Person, String> extractName = person -> person.getName();

// Flat map transformations  
FlatMapFunction<String, String> splitWords = line -> Arrays.asList(line.split(" ")).iterator();
FlatMapFunction<List<Integer>, Integer> flattenList = list -> list.iterator();

// Filter operations
FilterFunction<Integer> isPositive = x -> x > 0;
FilterFunction<String> isNotEmpty = s -> !s.isEmpty();

// Partition-level operations
MapPartitionsFunction<String, String> processPartition = partition -> {
  List<String> results = new ArrayList<>();
  while (partition.hasNext()) {
    String line = partition.next();
    // Process entire partition with shared resources
    results.add(line.toUpperCase());
  }
  return results.iterator();
};

Key-Value Pair Operations

Interfaces for operations that work with key-value pairs and create PairRDDs.

/**
 * Function that returns key-value pairs for constructing PairRDDs
 */
@FunctionalInterface
public interface PairFunction<T, K, V> extends Serializable {
  /**
   * Extract key-value pair from input element
   * @param t Input element
   * @return Tuple2 containing key and value
   * @throws Exception If extraction fails
   */
  Tuple2<K, V> call(T t) throws Exception;
}

/**
 * Flat map function that produces key-value pairs
 */
@FunctionalInterface
public interface PairFlatMapFunction<T, K, V> extends Serializable {
  /**
   * Transform single input into multiple key-value pairs
   * @param t Input element
   * @return Iterator over key-value tuples
   * @throws Exception If transformation fails
   */
  Iterator<Tuple2<K, V>> call(T t) throws Exception;
}

/**
 * Function for flat map operations with two inputs (used in cogroup)
 */
@FunctionalInterface
public interface FlatMapFunction2<T1, T2, R> extends Serializable {
  /**
   * Transform two inputs into multiple outputs
   * @param t1 First input
   * @param t2 Second input
   * @return Iterator over output values
   * @throws Exception If transformation fails
   */
  Iterator<R> call(T1 t1, T2 t2) throws Exception;
}

Usage Examples:

import scala.Tuple2;

// Creating key-value pairs
PairFunction<String, String, Integer> wordCount = 
  word -> new Tuple2<>(word, 1);

PairFunction<Employee, String, Employee> indexByName = 
  emp -> new Tuple2<>(emp.getName(), emp);

// Flat map to pairs
PairFlatMapFunction<String, String, Integer> wordFrequency = line -> {
  List<Tuple2<String, Integer>> pairs = new ArrayList<>();
  for (String word : line.split(" ")) {
    pairs.add(new Tuple2<>(word, 1));
  }
  return pairs.iterator();
};

// Two-input flat map
FlatMapFunction2<String, Integer, String> replicate = (str, count) -> {
  List<String> result = new ArrayList<>();
  for (int i = 0; i < count; i++) {
    result.add(str);
  }  
  return result.iterator();
};

Aggregate and Reduce Operations

Interfaces for aggregation, reduction, and grouping operations.

/**
 * Function for reduce operations
 */
@FunctionalInterface
public interface ReduceFunction<T> extends Serializable {
  /**
   * Combine two values into a single value
   * @param v1 First value
   * @param v2 Second value  
   * @return Combined result
   * @throws Exception If combination fails
   */
  T call(T v1, T v2) throws Exception;
}

/**
 * Function for cogroup operations combining iterables from two RDDs
 */
@FunctionalInterface
public interface CoGroupFunction<V1, V2, R> extends Serializable {
  /**
   * Process cogroup result with iterables from both RDDs
   * @param v1 Values from first RDD for this key
   * @param v2 Values from second RDD for this key
   * @return Iterator over output values
   * @throws Exception If processing fails
   */
  Iterator<R> call(Iterable<V1> v1, Iterable<V2> v2) throws Exception;
}

/**
 * Function for map groups operations on grouped data
 */
@FunctionalInterface  
public interface MapGroupsFunction<K, V, R> extends Serializable {
  /**
   * Process all values for a given key
   * @param key The grouping key
   * @param values Iterator over all values for this key
   * @return Single result for this group
   * @throws Exception If processing fails
   */
  R call(K key, Iterator<V> values) throws Exception;
}

/**
 * Function for flat map groups operations on grouped data
 */
@FunctionalInterface
public interface FlatMapGroupsFunction<K, V, R> extends Serializable {
  /**
   * Process all values for a given key, producing multiple outputs
   * @param key The grouping key
   * @param values Iterator over all values for this key
   * @return Iterator over results for this group
   * @throws Exception If processing fails
   */
  Iterator<R> call(K key, Iterator<V> values) throws Exception;
}

Usage Examples:

// Reduce operations
ReduceFunction<Integer> sum = (a, b) -> a + b;
ReduceFunction<String> concatenate = (s1, s2) -> s1 + s2;

// Group processing
MapGroupsFunction<String, Integer, Integer> sumByKey = (key, values) -> {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next();
  }
  return sum;
};

FlatMapGroupsFunction<String, Person, String> extractEmails = (dept, people) -> {
  List<String> emails = new ArrayList<>();
  while (people.hasNext()) {
    Person person = people.next();
    if (person.getEmail() != null) {
      emails.add(person.getEmail());
    }
  }
  return emails.iterator();
};

// Cogroup operations
CoGroupFunction<Integer, String, String> joinData = (numbers, strings) -> {
  List<String> results = new ArrayList<>();
  for (Integer num : numbers) {
    for (String str : strings) {
      results.add(str + ":" + num);
    }
  }
  return results.iterator();
};

Action and Side-Effect Interfaces

Interfaces for actions that produce side effects rather than transformations.

/**
 * Function with void return type for foreach operations
 */
@FunctionalInterface
public interface VoidFunction<T> extends Serializable {
  /**
   * Process element with side effects (no return value)
   * @param t Input element
   * @throws Exception If processing fails
   */
  void call(T t) throws Exception;
}

/**
 * Function with void return type and two arguments
 */
@FunctionalInterface
public interface VoidFunction2<T1, T2> extends Serializable {
  /**
   * Process two elements with side effects
   * @param v1 First input
   * @param v2 Second input
   * @throws Exception If processing fails
   */
  void call(T1 v1, T2 v2) throws Exception;
}

/**
 * Function for foreach operations on individual elements
 */
@FunctionalInterface
public interface ForeachFunction<T> extends Serializable {
  /**
   * Process single element with side effects
   * @param t Input element  
   * @throws Exception If processing fails
   */
  void call(T t) throws Exception;
}

/**
 * Function for foreach operations on partitions
 */
@FunctionalInterface
public interface ForeachPartitionFunction<T> extends Serializable {
  /**
   * Process entire partition with side effects
   * @param t Iterator over partition elements
   * @throws Exception If processing fails
   */
  void call(Iterator<T> t) throws Exception;
}

Usage Examples:

// Element-wise side effects
VoidFunction<String> printLine = System.out::println;
ForeachFunction<Employee> saveEmployee = emp -> database.save(emp);

// Two-argument side effects  
VoidFunction2<String, Integer> logWithCount = (msg, count) -> 
  logger.info("Message: {} (count: {})", msg, count);

// Partition-level side effects
ForeachPartitionFunction<Record> batchSave = partition -> {
  List<Record> batch = new ArrayList<>();
  while (partition.hasNext()) {
    batch.add(partition.next());
    if (batch.size() >= 1000) {
      database.saveBatch(batch);
      batch.clear();
    }
  }
  if (!batch.isEmpty()) {
    database.saveBatch(batch);
  }
};

Specialized Type Functions

Functions for creating specialized RDD types like DoubleRDD.

/**
 * Function that returns double values for creating DoubleRDDs
 */
@FunctionalInterface
public interface DoubleFunction<T> extends Serializable {
  /**
   * Extract double value from input element
   * @param t Input element
   * @return Double value
   * @throws Exception If extraction fails
   */
  double call(T t) throws Exception;
}

/**
 * Flat map function that produces double values
 */
@FunctionalInterface
public interface DoubleFlatMapFunction<T> extends Serializable {
  /**
   * Transform single input into multiple double values
   * @param t Input element
   * @return Iterator over double values
   * @throws Exception If transformation fails
   */
  Iterator<Double> call(T t) throws Exception;
}

Usage Examples:

// Extract numeric values
DoubleFunction<String> parseDouble = Double::parseDouble;
DoubleFunction<Employee> getSalary = emp -> emp.getSalary();

// Generate multiple numeric values
DoubleFlatMapFunction<String> parseNumbers = line -> {
  List<Double> numbers = new ArrayList<>();
  for (String token : line.split(",")) {
    try {
      numbers.add(Double.parseDouble(token.trim()));
    } catch (NumberFormatException e) {
      // Skip invalid numbers
    }
  }
  return numbers.iterator();
};

Function Composition and Patterns

Lambda Expression Patterns

// Simple transformations
Function<String, String> trim = String::trim;
Function<String, Integer> length = String::length;

// Chained method references
Function<String, String> process = s -> s.trim().toLowerCase();

// Conditional logic
FilterFunction<Integer> isEven = x -> x % 2 == 0;
Function<Integer, String> classify = x -> x > 0 ? "positive" : "non-positive";

// Complex business logic
MapFunction<Order, OrderSummary> summarizeOrder = order -> {
  double total = order.getItems().stream()
    .mapToDouble(item -> item.getPrice() * item.getQuantity())
    .sum();
  return new OrderSummary(order.getId(), total, order.getCustomerId());
};

Error Handling in Functions

// Exception handling within functions
Function<String, Integer> safeParseInt = s -> {
  try {
    return Integer.parseInt(s);
  } catch (NumberFormatException e) {
    return 0; // Default value
  }
};

// Filtering with exception handling
FilterFunction<String> isValidNumber = s -> {
  try {
    Double.parseDouble(s);
    return true;
  } catch (NumberFormatException e) {
    return false;
  }
};