0
# Cluster Provisioning
1
2
Core SPI for creating, managing, and destroying compute clusters across different cloud platforms and environments. The provisioning system provides standardized interfaces for cluster lifecycle management with pluggable provisioner implementations that can integrate with various infrastructure providers.
3
4
## Capabilities
5
6
### Provisioner Interface
7
8
Main SPI interface that defines the contract for cluster provisioning implementations. Implementers provide concrete provisioning logic for specific platforms (AWS, GCP, Azure, Kubernetes, etc.).
9
10
```java { .api }
11
/**
12
* Main interface for creating and managing clusters
13
* Implementations provide platform-specific provisioning logic
14
*/
15
interface Provisioner {
16
/** Get the specification of this provisioner */
17
ProvisionerSpecification getSpec();
18
19
/** Initialize the provisioner with system context */
20
void initialize(ProvisionerSystemContext context) throws Exception;
21
22
/** Validate provisioner-specific properties (throws IllegalArgumentException if invalid) */
23
void validateProperties(Map<String, String> properties);
24
25
/** Create a new cluster with the given context */
26
Cluster createCluster(ProvisionerContext context) throws Exception;
27
28
/** Get the current status of a cluster */
29
ClusterStatus getClusterStatus(ProvisionerContext context, Cluster cluster) throws Exception;
30
31
/** Get the reason for cluster failure if it is available (only when status is FAILED) */
32
String getClusterFailureMsg(ProvisionerContext context, Cluster cluster) throws Exception;
33
34
/** Get detailed information about a cluster */
35
Cluster getClusterDetail(ProvisionerContext context, Cluster cluster) throws Exception;
36
37
/** Initialize a cluster after it has been created */
38
void initializeCluster(ProvisionerContext context, Cluster cluster) throws Exception;
39
40
/** @deprecated Since 6.2.0. Use deleteClusterWithStatus instead */
41
@Deprecated
42
void deleteCluster(ProvisionerContext context, Cluster cluster) throws Exception;
43
44
/** Delete a cluster with its current status */
45
ClusterStatus deleteClusterWithStatus(ProvisionerContext context, Cluster cluster) throws Exception;
46
47
/** Get the polling strategy for checking cluster status */
48
PollingStrategy getPollingStrategy(ProvisionerContext context, Cluster cluster);
49
50
/** Get the capabilities of this provisioner */
51
Capabilities getCapabilities();
52
53
/** Get the runtime job manager for this provisioner (optional) */
54
Optional<RuntimeJobManager> getRuntimeJobManager(ProvisionerContext context);
55
56
/** Get a label describing total processing CPUs from properties */
57
Optional<String> getTotalProcessingCpusLabel(Map<String, String> properties);
58
}
59
```
60
61
**Usage Example:**
62
63
```java
64
public class MyCloudProvisioner implements Provisioner {
65
@Override
66
public ProvisionerSpecification getSpec() {
67
return new ProvisionerSpecification("my-cloud",
68
"My Cloud Provisioner",
69
"Provisions clusters on MyCloud platform");
70
}
71
72
@Override
73
public Cluster createCluster(ProvisionerContext context) throws Exception {
74
Map<String, String> properties = context.getProperties();
75
ProgramRunInfo runInfo = context.getProgramRunInfo();
76
77
// Create cluster using cloud provider APIs
78
String clusterId = createCloudCluster(properties, runInfo);
79
80
return new Cluster(clusterId, ClusterStatus.CREATING,
81
Collections.emptyList(), properties);
82
}
83
84
@Override
85
public ClusterStatus getClusterStatus(ProvisionerContext context, Cluster cluster)
86
throws Exception {
87
// Check cluster status via cloud provider API
88
return checkCloudClusterStatus(cluster.getName());
89
}
90
91
// Other method implementations...
92
}
93
```
94
95
### Provisioner Context
96
97
Runtime context provided to provisioner operations containing program information, configuration, and utility services.
98
99
```java { .api }
100
/**
101
* Context for provisioner operations providing runtime information and services
102
*/
103
interface ProvisionerContext {
104
/** @deprecated Use getProgramRunInfo() instead */
105
@Deprecated
106
ProgramRun getProgramRun();
107
108
/** Get program run information */
109
ProgramRunInfo getProgramRunInfo();
110
111
/** Get provisioner-specific properties */
112
Map<String, String> getProperties();
113
114
/** Get SSH context for remote operations (nullable) */
115
@Nullable SSHContext getSSHContext();
116
117
/** Get Spark compatibility version */
118
SparkCompat getSparkCompat();
119
120
/** @deprecated Use getCDAPVersionInfo() instead */
121
@Deprecated
122
String getCDAPVersion();
123
124
/** Get CDAP version information */
125
VersionInfo getCDAPVersionInfo();
126
127
/** Get application-specific CDAP version information (nullable) */
128
@Nullable VersionInfo getAppCDAPVersionInfo();
129
130
/** Get location factory for file operations */
131
LocationFactory getLocationFactory();
132
133
/** Get runtime monitor type */
134
RuntimeMonitorType getRuntimeMonitorType();
135
136
/** Get provisioner metrics collector */
137
ProvisionerMetrics getMetrics(Map<String, String> tags);
138
139
/** Execute a task asynchronously */
140
<T> CompletionStage<T> execute(Callable<T> callable);
141
142
/** Get profile name (nullable) */
143
@Nullable String getProfileName();
144
145
/** Get error category for exception handling (nullable) */
146
@Nullable ErrorCategory getErrorCategory();
147
}
148
```
149
150
### Provisioner System Context
151
152
System-level context for provisioner initialization and configuration management.
153
154
```java { .api }
155
/**
156
* System-level context for provisioner initialization
157
*/
158
interface ProvisionerSystemContext {
159
/** Get system properties */
160
Map<String, String> getProperties();
161
162
/** Reload properties from system configuration */
163
void reloadProperties();
164
165
/** Get CDAP version (deprecated, use getCDAPVersionInfo) */
166
@Deprecated
167
String getCDAPVersion();
168
169
/** Get a distributed lock with the given name */
170
Lock getLock(String name);
171
}
172
```
173
174
### Cluster Data Types
175
176
Core data structures representing clusters and their components.
177
178
```java { .api }
179
/**
180
* Information about a cluster including nodes and properties
181
*/
182
class Cluster {
183
/** Get cluster name/identifier */
184
String getName();
185
186
/** Get current cluster status */
187
ClusterStatus getStatus();
188
189
/** Get list of cluster nodes */
190
List<Node> getNodes();
191
192
/** Get cluster properties */
193
Map<String, String> getProperties();
194
}
195
196
/**
197
* Information about a cluster node
198
*/
199
class Node {
200
/** Get unique node identifier */
201
String getId();
202
203
/** Get node type */
204
Node.Type getType();
205
206
/** Get node IP address */
207
String getIpAddress();
208
209
/** Get node creation timestamp */
210
long getCreateTime();
211
212
/** Get node-specific properties */
213
Map<String, String> getProperties();
214
215
/** Node types */
216
enum Type {
217
MASTER, // Master/coordinator nodes
218
WORKER, // Worker/compute nodes
219
UNKNOWN // Unknown or unspecified type
220
}
221
}
222
```
223
224
### Cluster Status Management
225
226
Status enumeration and related utilities for cluster state tracking.
227
228
```java { .api }
229
/**
230
* Status values for clusters
231
*/
232
enum ClusterStatus {
233
CREATING, // Cluster is being created
234
RUNNING, // Cluster is running and available
235
FAILED, // Cluster creation or operation failed
236
DELETING, // Cluster is being deleted
237
NOT_EXISTS, // Cluster does not exist
238
ORPHANED // Cluster exists but is not managed
239
}
240
```
241
242
### Provisioner Specification
243
244
Metadata describing a provisioner implementation.
245
246
```java { .api }
247
/**
248
* Specification of a provisioner including name, label, and description
249
*/
250
class ProvisionerSpecification {
251
/** Get provisioner name (unique identifier) */
252
String getName();
253
254
/** Get human-readable label */
255
String getLabel();
256
257
/** Get detailed description */
258
String getDescription();
259
}
260
```
261
262
### Provisioner Capabilities
263
264
Declaration of what dataset types a provisioner can handle.
265
266
```java { .api }
267
/**
268
* Encapsulates provisioner capabilities for specific dataset types
269
*/
270
class Capabilities {
271
/** Get supported dataset types */
272
Set<String> getDatasetTypes();
273
274
/** Check if capabilities are empty */
275
boolean isEmpty();
276
277
/** Empty capabilities constant */
278
static final Capabilities EMPTY;
279
}
280
```
281
282
### Polling Strategies
283
284
Configurable strategies for polling cluster status during operations.
285
286
```java { .api }
287
/**
288
* Defines how to poll for cluster status changes
289
*/
290
interface PollingStrategy {
291
/**
292
* Get next poll interval in milliseconds
293
* @param numPolls Number of polls already performed
294
* @param startTime Time when polling started
295
* @return Milliseconds to wait before next poll, or -1 to stop polling
296
*/
297
long nextPoll(int numPolls, long startTime);
298
}
299
300
/**
301
* Factory for common polling strategies
302
*/
303
class PollingStrategies {
304
/** Create fixed interval polling strategy */
305
static PollingStrategy fixedInterval(long interval, TimeUnit unit);
306
307
/** Add initial delay to existing strategy */
308
static PollingStrategy initialDelay(PollingStrategy strategy, long delay, TimeUnit unit);
309
310
/** Add jittered initial delay to existing strategy */
311
static PollingStrategy initialDelay(PollingStrategy strategy, long minDelay,
312
long maxDelay, TimeUnit unit);
313
}
314
```
315
316
**Usage Example:**
317
318
```java
319
// Fixed 30-second polling
320
PollingStrategy fixedStrategy = PollingStrategies.fixedInterval(30, TimeUnit.SECONDS);
321
322
// Fixed polling with 2-minute initial delay
323
PollingStrategy delayedStrategy = PollingStrategies.initialDelay(
324
PollingStrategies.fixedInterval(15, TimeUnit.SECONDS),
325
2, TimeUnit.MINUTES
326
);
327
```
328
329
### Metrics Collection
330
331
Interface for collecting provisioner operation metrics.
332
333
```java { .api }
334
/**
335
* Interface for collecting provisioner metrics
336
*/
337
interface ProvisionerMetrics {
338
/** Increment a counter metric */
339
void count(String metricName, int delta);
340
341
/** Set a gauge metric value */
342
void gauge(String metricName, long value);
343
}
344
```
345
346
### Exception Types
347
348
Specialized exceptions for provisioner operations.
349
350
```java { .api }
351
/**
352
* Exception indicating a retryable provisioning failure
353
*/
354
class RetryableProvisionException extends Exception implements ErrorTagProvider {
355
/** Get error tags for categorization */
356
Set<String> getErrorTags();
357
}
358
```
359
360
### Cluster Properties Constants
361
362
Common property keys used in cluster configuration.
363
364
```java { .api }
365
/**
366
* Common constants for cluster properties
367
*/
368
final class ClusterProperties {
369
/** Kerberos principal property key */
370
static final String KERBEROS_PRINCIPAL = "kerberos.principal";
371
372
/** Kerberos keytab property key */
373
static final String KERBEROS_KEYTAB = "kerberos.keytab";
374
}
375
```
376
377
### Deprecated Classes
378
379
Legacy classes maintained for backward compatibility.
380
381
```java { .api }
382
/**
383
* @deprecated since 6.2.0. Use ProgramRunInfo instead
384
*/
385
@Deprecated
386
class ProgramRun {
387
String getNamespace();
388
String getApplication();
389
String getProgram();
390
String getRun();
391
}
392
```