Core application programming interface for the Cask Data Application Platform enabling development of scalable data processing applications on Hadoop ecosystems.
—
The Application Framework provides the foundation for building CDAP applications, offering base classes, configuration interfaces, and lifecycle management for complete data processing solutions.
public interface Application<T extends Config> {
void configure(ApplicationConfigurer configurer, ApplicationContext<T> context);
}Base interface for all CDAP applications. Applications must implement the configure(ApplicationConfigurer, ApplicationContext) method to define their structure, programs, datasets, and services using the provided configurer and accessing context information.
public abstract class AbstractApplication<T extends Config> implements Application<T> {
public abstract void configure();
// Application metadata
protected final void setName(String name);
protected final void setDescription(String description);
// Program management
protected final void addMapReduce(MapReduce mapReduce);
protected final void addSpark(Spark spark);
protected final void addWorkflow(Workflow workflow);
protected final void addService(Service service);
protected final void addWorker(Worker worker);
// Dataset management
protected final void addDatasetModule(String moduleName, Class<? extends DatasetModule> moduleClass);
protected final void createDataset(String datasetName, String datasetType);
protected final void createDataset(String datasetName, String datasetType, DatasetProperties properties);
// Plugin usage
protected final void usePlugin(String pluginType, String pluginName, String pluginId, PluginProperties properties);
// Scheduling
protected final void scheduleWorkflow(Schedules schedule, String workflowName);
}Base implementation class for CDAP applications. Provides helper methods for configuring applications with programs, datasets, and scheduling.
public class SingleRunnableApplication<T extends Config> extends AbstractApplication<T> {
public SingleRunnableApplication(ProgramLifecycle<? extends RuntimeContext> program);
@Override
public void configure();
}Simplified application for single-program use cases.
public interface ApplicationConfigurer extends DatasetConfigurer, PluginConfigurer {
void setName(String name);
void setDescription(String description);
void addMapReduce(MapReduce mapReduce);
void addSpark(Spark spark);
void addWorkflow(Workflow workflow);
void addService(Service service);
void addWorker(Worker worker);
void scheduleWorkflow(Schedules schedule, String workflowName);
}Interface for configuring applications during the configure phase.
public interface ProgramConfigurer {
void setName(String name);
void setDescription(String description);
void setProperties(Map<String, String> properties);
void setResources(Resources resources);
}Base interface for configuring individual programs.
public interface DatasetConfigurer {
void addDatasetModule(String moduleName, Class<? extends DatasetModule> moduleClass);
void createDataset(String datasetName, String datasetType);
void createDataset(String datasetName, String datasetType, DatasetProperties properties);
void useDataset(String datasetName);
}Interface for dataset configuration within applications and programs.
public interface ApplicationContext<T extends Config> {
T getConfig();
ApplicationSpecification getApplicationSpecification();
String getNamespace();
}Runtime context available to applications during configuration.
public class ApplicationSpecification {
public String getName();
public String getDescription();
public String getConfiguration();
public ArtifactId getArtifactId();
public Map<String, MapReduceSpecification> getMapReduce();
public Map<String, SparkSpecification> getSpark();
public Map<String, WorkflowSpecification> getWorkflows();
public Map<String, ServiceSpecification> getServices();
public Map<String, WorkerSpecification> getWorkers();
public Map<String, DatasetCreationSpec> getDatasets();
public Map<String, Plugin> getPlugins();
}Complete specification of an application including all its components and configurations.
public interface ProgramLifecycle<T extends RuntimeContext> {
void initialize(T context) throws Exception;
void destroy();
}Lifecycle interface for all program types providing initialization and cleanup hooks.
public interface RuntimeContext {
String getNamespace();
String getApplicationName();
String getProgramName();
ProgramType getProgramType();
String getRunId();
Map<String, String> getRuntimeArguments();
Metrics getMetrics();
PluginContext getPluginContext();
ServiceDiscoverer getServiceDiscoverer();
Admin getAdmin();
}Base runtime context available to all programs during execution.
public class Config {
// Base configuration class for all configurable components
// Can be extended to add custom configuration properties
}Base class for application and plugin configurations.
public class Resources {
public Resources(int memoryMB);
public Resources(int memoryMB, int virtualCores);
public int getMemoryMB();
public int getVirtualCores();
}Resource specification for programs and containers.
public class MyApplication extends AbstractApplication<Config> {
@Override
public void configure() {
setName("MyDataProcessingApp");
setDescription("Processes customer data using MapReduce and Spark");
// Create datasets
createDataset("customerData", KeyValueTable.class);
createDataset("processedData", TimePartitionedFileSet.class);
// Add programs
addMapReduce(new CustomerDataProcessor());
addSpark(new CustomerAnalytics());
addService(new CustomerDataService());
// Schedule workflow
scheduleWorkflow(Schedules.builder("DailyProcessing")
.setDescription("Daily customer data processing")
.createTimeSchedule("0 2 * * *"), "DataProcessingWorkflow");
}
}public class CustomConfig extends Config {
@Description("Input dataset name")
private String inputDataset = "input";
@Description("Processing batch size")
private int batchSize = 1000;
// Getters and setters
public String getInputDataset() { return inputDataset; }
public int getBatchSize() { return batchSize; }
}
public class ConfigurableApplication extends AbstractApplication<CustomConfig> {
@Override
public void configure() {
CustomConfig config = getConfig();
setName("ConfigurableApp");
createDataset(config.getInputDataset(), KeyValueTable.class);
// Use configuration in program setup
addMapReduce(new ConfigurableProcessor(config.getBatchSize()));
}
}public class SimpleWorkerApp extends SingleRunnableApplication<Config> {
public SimpleWorkerApp() {
super(new SimpleDataWorker());
}
}Install with Tessl CLI
npx tessl i tessl/maven-co-cask-cdap--cdap-api