Core application programming interface for the Cask Data Application Platform enabling development of scalable data processing applications on Hadoop ecosystems.
npx @tessl/cli install tessl/maven-co-cask-cdap--cdap-api@5.1.0The CDAP API provides a comprehensive set of Java interfaces and abstractions for developing applications on the Cask Data Application Platform (CDAP). CDAP is a unified data platform built on Apache Hadoop that enables developers to create scalable data applications, workflows, services, and batch/real-time processing programs without dealing directly with the complexity of the underlying Hadoop infrastructure.
co.cask.cdap:cdap-api:5.1.2pom.xml:<dependency>
<groupId>co.cask.cdap</groupId>
<artifactId>cdap-api</artifactId>
<version>5.1.2</version>
</dependency>import co.cask.cdap.api.app.Application;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.app.ApplicationConfigurer;
import co.cask.cdap.api.Config;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.dataset.Dataset;import co.cask.cdap.api.app.Application;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.app.ApplicationConfigurer;
import co.cask.cdap.api.Config;
public class MyApplication extends AbstractApplication<Config> {
@Override
public void configure(ApplicationConfigurer configurer, ApplicationContext<Config> context) {
configurer.setName("MyDataApp");
configurer.setDescription("A sample CDAP application");
// Add datasets, programs, services, etc.
configurer.addMapReduce(new MyMapReduceJob());
configurer.addService(new MyService());
}
}The CDAP API is organized around several key architectural concepts:
Core interfaces and classes for building CDAP applications with configuration, lifecycle management, and program organization.
public interface Application<T extends Config> {
void configure(ApplicationConfigurer configurer, ApplicationContext<T> context);
}
public abstract class AbstractApplication<T extends Config> implements Application<T> {
public final void configure(ApplicationConfigurer configurer, ApplicationContext<T> context);
protected abstract void configure();
protected final void setName(String name);
protected final void setDescription(String description);
}
public interface ApplicationConfigurer extends DatasetConfigurer, PluginConfigurer {
void setName(String name);
void setDescription(String description);
void addMapReduce(MapReduce mapReduce);
void addSpark(Spark spark);
void addWorkflow(Workflow workflow);
void addService(Service service);
void addWorker(Worker worker);
ScheduleBuilder buildSchedule(String scheduleName, ProgramType programType, String programName);
TriggerFactory getTriggerFactory();
}Support for various program types including MapReduce, Spark, Workflow orchestration, HTTP services, and background workers.
public interface MapReduce {
void configure(MapReduceConfigurer configurer);
}
public interface Spark {
void configure(SparkConfigurer configurer);
}
public interface Workflow {
void configure(WorkflowConfigurer configurer);
}Comprehensive dataset APIs with built-in types (key-value, indexed tables, file sets) and support for custom dataset implementations.
public interface Dataset extends Closeable {
// Base dataset interface
}
public interface DatasetDefinition<D extends Dataset, A extends DatasetAdmin> {
String getName();
D getDataset(DatasetContext datasetContext, DatasetSpecification spec,
Map<String, String> arguments, ClassLoader classLoader);
}Extensible plugin architecture for adding custom processing logic, data sources, sinks, and transformations.
public class PluginConfig {
// Base plugin configuration
}
public interface PluginContext {
<T> T newPluginInstance(String pluginId);
<T> Class<T> loadPluginClass(String pluginId);
}
@Plugin(type = "source")
public class MySourcePlugin extends PluginConfig {
// Custom plugin implementation
}Flexible scheduling system with time-based triggers, program status triggers, and partition-based triggers for automated program execution.
public class ScheduleBuilder {
public static ScheduleBuilder create(String name, Trigger trigger);
public ScheduleBuilder setDescription(String description);
public ScheduleBuilder setProperties(Map<String, String> properties);
}
public interface Trigger {
// Base trigger interface
}Built-in support for ACID transactions across datasets with declarative transaction control and programmatic transaction management.
public interface Transactional {
void execute(TxRunnable runnable);
<T> T execute(Callable<T> callable);
}
@TransactionPolicy(TransactionControl.EXPLICIT)
public class MyProgram {
// Explicit transaction control
}Rich annotation-based configuration system for dependency injection, transaction control, data access patterns, and plugin metadata.
// Flowlet dataset injection
@UseDataSet("myDataset")
private ObjectStore<Data> dataStore; // In Flowlet context
@Property
@Description("Configuration property description")
private String configValue;
@TransactionPolicy(TransactionControl.IMPLICIT)
public class MyTransactionalProgram {
// Implicit transaction handling
}Integration with CDAP system services including metrics collection, service discovery, administrative operations, and artifact management.
public interface Metrics {
void count(String metricName, int delta);
void gauge(String metricName, long value);
}
public interface ServiceDiscoverer {
Discoverable discover(String serviceName);
}public class Config {
// Base configuration class for all configurable components
}
public enum ProgramType {
FLOW, MAPREDUCE, WORKFLOW, SERVICE, SPARK, WORKER
}
public interface RuntimeContext {
String getNamespace();
String getApplicationName();
ProgramType getProgramType();
String getProgramName();
}
public interface ProgramLifecycle<T extends RuntimeContext> {
void initialize(T context);
void destroy();
}
public class Resources {
private final int virtualCores;
private final int memoryMB;
public Resources(int memoryMB);
public Resources(int memoryMB, int virtualCores);
}