0
# Cluster Management
1
2
The Elasticsearch test framework provides sophisticated cluster management capabilities through the `InternalTestCluster` class and related utilities. This enables testing of multi-node scenarios, cluster state management, node lifecycle operations, and distributed system behaviors.
3
4
## InternalTestCluster
5
6
The core cluster management implementation that handles multi-node test clusters with full lifecycle management.
7
8
```{ .api }
9
package org.elasticsearch.test;
10
11
import org.elasticsearch.client.internal.Client;
12
import org.elasticsearch.cluster.ClusterState;
13
import org.elasticsearch.cluster.node.DiscoveryNode;
14
import org.elasticsearch.cluster.service.ClusterService;
15
import org.elasticsearch.common.settings.Settings;
16
import org.elasticsearch.core.TimeValue;
17
import org.elasticsearch.node.Node;
18
import org.elasticsearch.plugins.Plugin;
19
20
/**
21
* Internal implementation of TestCluster providing comprehensive multi-node cluster management.
22
* Handles node startup, shutdown, configuration, and cluster state management for integration testing.
23
*/
24
public final class InternalTestCluster extends TestCluster {
25
26
/**
27
* Starts a new data node with random configuration.
28
*
29
* @return name of the started node
30
*/
31
public String startNode();
32
33
/**
34
* Starts a new node with the specified settings.
35
*
36
* @param settings node configuration settings
37
* @return name of the started node
38
*/
39
public String startNode(Settings settings);
40
41
/**
42
* Starts a data node with custom configuration.
43
*
44
* @param settings node settings
45
* @param version transport version for the node
46
* @return name of the started node
47
*/
48
public String startDataOnlyNode(Settings settings, TransportVersion version);
49
50
/**
51
* Starts a master-eligible node.
52
*
53
* @param settings node configuration
54
* @return name of the started master node
55
*/
56
public String startMasterOnlyNode(Settings settings);
57
58
/**
59
* Starts a coordinating-only node (no data, no master).
60
*
61
* @param settings node configuration
62
* @return name of the started coordinating node
63
*/
64
public String startCoordinatingOnlyNode(Settings settings);
65
66
/**
67
* Stops the specified node gracefully.
68
*
69
* @param node name or node reference to stop
70
* @return true if node was stopped successfully
71
*/
72
public boolean stopRandomNode();
73
74
/**
75
* Stops a specific node.
76
*
77
* @param node node name or predicate to identify node
78
*/
79
public void stopNode(String node);
80
81
/**
82
* Restarts a node that was previously stopped.
83
*
84
* @param node node name to restart
85
* @param callback optional callback executed during restart
86
* @return name of the restarted node
87
*/
88
public String restartNode(String node, InternalTestCluster.RestartCallback callback);
89
90
/**
91
* Restarts a random node in the cluster.
92
*
93
* @param callback optional restart callback
94
* @return name of the restarted node
95
*/
96
public String restartRandomNode(InternalTestCluster.RestartCallback callback);
97
98
/**
99
* Restarts all nodes in the cluster.
100
*
101
* @param callback callback executed for each node restart
102
*/
103
public void restartAllNodes(InternalTestCluster.RestartCallback callback);
104
105
/**
106
* Ensures the cluster has at least the specified number of data nodes.
107
* Starts additional nodes if necessary.
108
*
109
* @param n minimum number of data nodes required
110
*/
111
public void ensureAtLeastNumDataNodes(int n);
112
113
/**
114
* Ensures the cluster has at most the specified number of data nodes.
115
* Stops excess nodes if necessary.
116
*
117
* @param n maximum number of data nodes allowed
118
*/
119
public void ensureAtMostNumDataNodes(int n);
120
121
/**
122
* Returns a client connected to a specific node.
123
*
124
* @param nodeName name of the node
125
* @return client for the specified node
126
*/
127
public Client client(String nodeName);
128
129
/**
130
* Returns a client connected to a random node.
131
*
132
* @return client for a random node
133
*/
134
public Client client();
135
136
/**
137
* Returns a client that performs operations on the master node.
138
*
139
* @return master client
140
*/
141
public Client masterClient();
142
143
/**
144
* Returns a client connected to a non-master node.
145
*
146
* @return non-master client
147
*/
148
public Client nonMasterClient();
149
150
/**
151
* Returns a client connected to a data node.
152
*
153
* @return data node client
154
*/
155
public Client dataNodeClient();
156
157
/**
158
* Gets the current master node.
159
*
160
* @return name of the master node
161
*/
162
public String getMasterName();
163
164
/**
165
* Gets all node names in the cluster.
166
*
167
* @return set of all node names
168
*/
169
public Set<String> getNodeNames();
170
171
/**
172
* Gets names of all data nodes.
173
*
174
* @return set of data node names
175
*/
176
public Set<String> getDataNodeNames();
177
178
/**
179
* Gets names of all master-eligible nodes.
180
*
181
* @return set of master node names
182
*/
183
public Set<String> getMasterEligibleNodeNames();
184
185
/**
186
* Returns the ClusterService for a specific node.
187
*
188
* @param node node name
189
* @return ClusterService instance
190
*/
191
public ClusterService clusterService(String node);
192
193
/**
194
* Returns the current cluster state.
195
*
196
* @return current ClusterState
197
*/
198
public ClusterState clusterState();
199
200
/**
201
* Waits for the cluster to reach the specified state.
202
*
203
* @param statePredicate predicate to match desired state
204
* @param timeout maximum time to wait
205
*/
206
public void waitForState(Predicate<ClusterState> statePredicate, TimeValue timeout);
207
208
/**
209
* Waits for all nodes to agree on the cluster state.
210
*
211
* @param timeout maximum time to wait
212
*/
213
public void waitForConsensus(TimeValue timeout);
214
215
/**
216
* Forces a master election by stopping the current master.
217
*/
218
public void forceMasterElection();
219
220
/**
221
* Validates that the cluster is properly formed and healthy.
222
*/
223
public void validateClusterFormed();
224
225
/**
226
* Interface for callbacks executed during node restarts.
227
*/
228
public static interface RestartCallback {
229
/**
230
* Called with the settings that will be used for the restarted node.
231
*
232
* @param nodeSettings current node settings
233
* @return modified settings for restart
234
*/
235
Settings onNodeStopped(String nodeName);
236
237
/**
238
* Called before the node is restarted.
239
*
240
* @param nodeName name of the node being restarted
241
*/
242
boolean clearData(String nodeName);
243
}
244
}
245
```
246
247
### InternalTestCluster Usage Examples
248
249
```java
250
import org.elasticsearch.test.ESIntegTestCase;
251
import org.elasticsearch.test.InternalTestCluster;
252
253
public class ClusterManagementTest extends ESIntegTestCase {
254
255
public void testMultiNodeCluster() {
256
InternalTestCluster cluster = internalCluster();
257
258
// Start with minimum nodes
259
cluster.ensureAtLeastNumDataNodes(3);
260
261
// Verify cluster formation
262
cluster.validateClusterForformed();
263
264
assertThat(cluster.size(), greaterThanOrEqualTo(3));
265
assertThat(cluster.numDataNodes(), greaterThanOrEqualTo(3));
266
}
267
268
public void testNodeLifecycle() {
269
InternalTestCluster cluster = internalCluster();
270
271
// Start additional nodes
272
String dataNode = cluster.startDataOnlyNode(Settings.EMPTY);
273
String masterNode = cluster.startMasterOnlyNode(Settings.EMPTY);
274
String coordinatingNode = cluster.startCoordinatingOnlyNode(Settings.EMPTY);
275
276
// Verify nodes are running
277
assertTrue(cluster.getNodeNames().contains(dataNode));
278
assertTrue(cluster.getNodeNames().contains(masterNode));
279
assertTrue(cluster.getNodeNames().contains(coordinatingNode));
280
281
// Test node restart
282
cluster.restartNode(dataNode, new InternalTestCluster.RestartCallback() {
283
@Override
284
public Settings onNodeStopped(String nodeName) {
285
return Settings.builder()
286
.put("node.attr.restarted", true)
287
.build();
288
}
289
290
@Override
291
public boolean clearData(String nodeName) {
292
return false; // Keep data during restart
293
}
294
});
295
296
// Verify node restarted successfully
297
assertTrue(cluster.getNodeNames().contains(dataNode));
298
}
299
300
public void testMasterElection() {
301
InternalTestCluster cluster = internalCluster();
302
303
// Start master-eligible nodes
304
cluster.ensureAtLeastNumDataNodes(3);
305
306
String originalMaster = cluster.getMasterName();
307
assertThat(originalMaster, notNullValue());
308
309
// Force master election
310
cluster.forceMasterElection();
311
312
// Wait for new master
313
cluster.waitForState(state ->
314
state.nodes().getMasterNode() != null,
315
TimeValue.timeValueSeconds(30)
316
);
317
318
String newMaster = cluster.getMasterName();
319
assertThat(newMaster, notNullValue());
320
}
321
322
public void testClientTypes() {
323
InternalTestCluster cluster = internalCluster();
324
cluster.ensureAtLeastNumDataNodes(2);
325
326
// Test different client types
327
Client masterClient = cluster.masterClient();
328
Client dataClient = cluster.dataNodeClient();
329
Client randomClient = cluster.client();
330
331
// All clients should be able to perform operations
332
masterClient.admin().cluster().prepareHealth().get();
333
dataClient.admin().cluster().prepareHealth().get();
334
randomClient.admin().cluster().prepareHealth().get();
335
}
336
}
337
```
338
339
## NodeConfigurationSource
340
341
Interface for providing custom node configurations during cluster setup.
342
343
```{ .api }
344
package org.elasticsearch.test;
345
346
import org.elasticsearch.common.settings.Settings;
347
import org.elasticsearch.plugins.Plugin;
348
349
/**
350
* Interface for customizing node configuration in test clusters.
351
* Allows injection of custom settings, plugins, and node-specific configuration.
352
*/
353
public interface NodeConfigurationSource {
354
355
/**
356
* Returns settings that should be applied to all nodes.
357
*
358
* @param nodeOrdinal ordinal number of the node being configured
359
* @return settings for the node
360
*/
361
Settings nodeSettings(int nodeOrdinal);
362
363
/**
364
* Returns plugins that should be installed on cluster nodes.
365
*
366
* @return collection of plugin classes
367
*/
368
Collection<Class<? extends Plugin>> nodePlugins();
369
370
/**
371
* Returns settings that should be applied to transport clients.
372
*
373
* @return transport client settings
374
*/
375
Settings transportClientSettings();
376
}
377
```
378
379
## ClusterService Access
380
381
Utilities for accessing and manipulating cluster services in tests.
382
383
```{ .api }
384
package org.elasticsearch.test;
385
386
import org.elasticsearch.cluster.service.ClusterService;
387
import org.elasticsearch.cluster.ClusterState;
388
import org.elasticsearch.cluster.ClusterStateUpdateTask;
389
390
/**
391
* Utilities for working with ClusterService in tests.
392
*/
393
public class ClusterServiceUtils {
394
395
/**
396
* Sets a new cluster state directly (bypassing normal update mechanisms).
397
* Use with caution - primarily for testing specific cluster states.
398
*
399
* @param clusterService target cluster service
400
* @param clusterState new cluster state to set
401
*/
402
public static void setState(ClusterService clusterService, ClusterState clusterState);
403
404
/**
405
* Adds a high priority cluster state update task.
406
*
407
* @param clusterService target cluster service
408
* @param reason reason for the update
409
* @param task update task to execute
410
*/
411
public static void submitStateUpdateTask(ClusterService clusterService,
412
String reason,
413
ClusterStateUpdateTask task);
414
415
/**
416
* Creates a ClusterService instance for testing with the specified initial state.
417
*
418
* @param initialState initial cluster state
419
* @return new ClusterService instance
420
*/
421
public static ClusterService createClusterService(ClusterState initialState);
422
423
/**
424
* Creates a minimal cluster state for testing.
425
*
426
* @param clusterName name of the test cluster
427
* @return basic cluster state
428
*/
429
public static ClusterState createClusterState(String clusterName);
430
}
431
```
432
433
## Discovery and Node Management
434
435
Utilities for managing node discovery and cluster formation in tests.
436
437
```{ .api }
438
package org.elasticsearch.cluster.node;
439
440
import org.elasticsearch.cluster.node.DiscoveryNode;
441
import org.elasticsearch.common.transport.TransportAddress;
442
443
/**
444
* Utilities for creating and managing discovery nodes in tests.
445
*/
446
public class DiscoveryNodeUtils {
447
448
/**
449
* Creates a new discovery node with random configuration.
450
*
451
* @param id node identifier
452
* @return configured DiscoveryNode
453
*/
454
public static DiscoveryNode create(String id);
455
456
/**
457
* Creates a discovery node with specific roles.
458
*
459
* @param id node identifier
460
* @param address transport address
461
* @param roles set of node roles
462
* @return configured DiscoveryNode
463
*/
464
public static DiscoveryNode create(String id,
465
TransportAddress address,
466
Set<DiscoveryNodeRole> roles);
467
468
/**
469
* Creates a master-eligible node.
470
*
471
* @param id node identifier
472
* @return master-eligible DiscoveryNode
473
*/
474
public static DiscoveryNode createMasterNode(String id);
475
476
/**
477
* Creates a data-only node.
478
*
479
* @param id node identifier
480
* @return data-only DiscoveryNode
481
*/
482
public static DiscoveryNode createDataNode(String id);
483
484
/**
485
* Creates a coordinating-only node.
486
*
487
* @param id node identifier
488
* @return coordinating-only DiscoveryNode
489
*/
490
public static DiscoveryNode createCoordinatingNode(String id);
491
492
/**
493
* Generates a random transport address for testing.
494
*
495
* @return random TransportAddress
496
*/
497
public static TransportAddress randomAddress();
498
}
499
```
500
501
## Cluster Scope Configuration
502
503
Advanced cluster configuration options for fine-tuning test cluster behavior.
504
505
```{ .api }
506
package org.elasticsearch.test;
507
508
/**
509
* Configuration options for test cluster scope and behavior.
510
*/
511
public class ClusterScope {
512
513
/**
514
* Defines when cluster instances are created and destroyed.
515
*/
516
public enum Scope {
517
/** Single cluster instance for entire test suite */
518
SUITE,
519
/** New cluster instance for each test method */
520
TEST
521
}
522
523
/**
524
* Transport client usage ratio for testing client behavior.
525
* 0.0 = always use node clients, 1.0 = always use transport clients
526
*/
527
public static final double TRANSPORT_CLIENT_RATIO = 0.3;
528
529
/**
530
* Default minimum number of master-eligible nodes for test clusters.
531
*/
532
public static final int DEFAULT_MIN_MASTER_NODES = 1;
533
534
/**
535
* Default settings applied to all test clusters.
536
*/
537
public static final Settings DEFAULT_SETTINGS = Settings.builder()
538
.put("discovery.type", "zen")
539
.put("transport.type", "netty4")
540
.put("http.type", "netty4")
541
.put("cluster.routing.rebalance.enable", "all")
542
.build();
543
}
544
```
545
546
### Advanced Cluster Management Example
547
548
```java
549
import org.elasticsearch.test.ESIntegTestCase;
550
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
551
552
@ESIntegTestCase.ClusterScope(
553
scope = ESIntegTestCase.Scope.TEST,
554
numDataNodes = 5,
555
numClientNodes = 1
556
)
557
public class AdvancedClusterTest extends ESIntegTestCase {
558
559
public void testLargeClusterOperations() {
560
InternalTestCluster cluster = internalCluster();
561
562
// Verify cluster size
563
assertThat(cluster.numDataNodes(), equalTo(5));
564
565
// Test rolling restart of all data nodes
566
Set<String> dataNodes = cluster.getDataNodeNames();
567
for (String node : dataNodes) {
568
cluster.restartNode(node, new InternalTestCluster.RestartCallback() {
569
@Override
570
public Settings onNodeStopped(String nodeName) {
571
return Settings.builder()
572
.put("node.attr.generation", "2")
573
.build();
574
}
575
576
@Override
577
public boolean clearData(String nodeName) {
578
return false;
579
}
580
});
581
582
// Wait for cluster to stabilize after each restart
583
ensureGreen();
584
}
585
586
// Verify all nodes have new attribute
587
NodesInfoResponse nodesInfo = client().admin().cluster().prepareNodesInfo().get();
588
for (NodeInfo nodeInfo : nodesInfo.getNodes()) {
589
if (nodeInfo.getNode().isDataNode()) {
590
assertThat(nodeInfo.getNode().getAttributes().get("generation"), equalTo("2"));
591
}
592
}
593
}
594
595
public void testMasterFailover() {
596
InternalTestCluster cluster = internalCluster();
597
598
// Ensure we have multiple master-eligible nodes
599
cluster.ensureAtLeastNumDataNodes(3);
600
601
String originalMaster = cluster.getMasterName();
602
603
// Stop the current master
604
cluster.stopNode(originalMaster);
605
606
// Wait for new master election
607
cluster.waitForState(state -> {
608
DiscoveryNode master = state.nodes().getMasterNode();
609
return master != null && !master.getName().equals(originalMaster);
610
}, TimeValue.timeValueSeconds(30));
611
612
// Verify cluster is still functional
613
ensureGreen();
614
615
String newMaster = cluster.getMasterName();
616
assertThat(newMaster, not(equalTo(originalMaster)));
617
}
618
619
@Override
620
protected Settings nodeSettings(int nodeOrdinal) {
621
return Settings.builder()
622
.put(super.nodeSettings(nodeOrdinal))
623
.put("node.name", "test-node-" + nodeOrdinal)
624
.put("cluster.routing.allocation.disk.threshold_enabled", false)
625
.build();
626
}
627
}
628
```
629
630
## Best Practices
631
632
### Cluster Sizing
633
- Use minimum viable cluster size for your test scenarios
634
- Prefer single node tests (`SingleNodeTestCase`) when cluster features aren't needed
635
- Consider using `@ClusterScope(scope=TEST)` for tests that modify cluster state
636
637
### Node Management
638
- Always ensure cluster health after node operations (`ensureGreen()`)
639
- Use appropriate timeouts for cluster state changes
640
- Clean up custom nodes and settings between tests
641
642
### Performance Optimization
643
- Reuse clusters at suite level when possible (`Scope.SUITE`)
644
- Minimize cluster restarts and node operations
645
- Use targeted health checks rather than waiting for full green status
646
647
### Resource Management
648
- Properly close clients and resources
649
- Monitor test cluster resource usage in CI environments
650
- Use appropriate cleanup in tearDown methods
651
652
The cluster management capabilities provide comprehensive control over multi-node test scenarios, enabling testing of complex distributed system behaviors while maintaining test isolation and repeatability.