or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

assertions-matchers.mdcluster-management.mdcore-testing.mddata-generation.mdindex.mdmock-implementations.mdspecialized-testing.md

cluster-management.mddocs/

0

# Cluster Management

1

2

The Elasticsearch test framework provides sophisticated cluster management capabilities through the `InternalTestCluster` class and related utilities. This enables testing of multi-node scenarios, cluster state management, node lifecycle operations, and distributed system behaviors.

3

4

## InternalTestCluster

5

6

The core cluster management implementation that handles multi-node test clusters with full lifecycle management.

7

8

```{ .api }

9

package org.elasticsearch.test;

10

11

import org.elasticsearch.client.internal.Client;

12

import org.elasticsearch.cluster.ClusterState;

13

import org.elasticsearch.cluster.node.DiscoveryNode;

14

import org.elasticsearch.cluster.service.ClusterService;

15

import org.elasticsearch.common.settings.Settings;

16

import org.elasticsearch.core.TimeValue;

17

import org.elasticsearch.node.Node;

18

import org.elasticsearch.plugins.Plugin;

19

20

/**

21

* Internal implementation of TestCluster providing comprehensive multi-node cluster management.

22

* Handles node startup, shutdown, configuration, and cluster state management for integration testing.

23

*/

24

public final class InternalTestCluster extends TestCluster {

25

26

/**

27

* Starts a new data node with random configuration.

28

*

29

* @return name of the started node

30

*/

31

public String startNode();

32

33

/**

34

* Starts a new node with the specified settings.

35

*

36

* @param settings node configuration settings

37

* @return name of the started node

38

*/

39

public String startNode(Settings settings);

40

41

/**

42

* Starts a data node with custom configuration.

43

*

44

* @param settings node settings

45

* @param version transport version for the node

46

* @return name of the started node

47

*/

48

public String startDataOnlyNode(Settings settings, TransportVersion version);

49

50

/**

51

* Starts a master-eligible node.

52

*

53

* @param settings node configuration

54

* @return name of the started master node

55

*/

56

public String startMasterOnlyNode(Settings settings);

57

58

/**

59

* Starts a coordinating-only node (no data, no master).

60

*

61

* @param settings node configuration

62

* @return name of the started coordinating node

63

*/

64

public String startCoordinatingOnlyNode(Settings settings);

65

66

/**

67

* Stops the specified node gracefully.

68

*

69

* @param node name or node reference to stop

70

* @return true if node was stopped successfully

71

*/

72

public boolean stopRandomNode();

73

74

/**

75

* Stops a specific node.

76

*

77

* @param node node name or predicate to identify node

78

*/

79

public void stopNode(String node);

80

81

/**

82

* Restarts a node that was previously stopped.

83

*

84

* @param node node name to restart

85

* @param callback optional callback executed during restart

86

* @return name of the restarted node

87

*/

88

public String restartNode(String node, InternalTestCluster.RestartCallback callback);

89

90

/**

91

* Restarts a random node in the cluster.

92

*

93

* @param callback optional restart callback

94

* @return name of the restarted node

95

*/

96

public String restartRandomNode(InternalTestCluster.RestartCallback callback);

97

98

/**

99

* Restarts all nodes in the cluster.

100

*

101

* @param callback callback executed for each node restart

102

*/

103

public void restartAllNodes(InternalTestCluster.RestartCallback callback);

104

105

/**

106

* Ensures the cluster has at least the specified number of data nodes.

107

* Starts additional nodes if necessary.

108

*

109

* @param n minimum number of data nodes required

110

*/

111

public void ensureAtLeastNumDataNodes(int n);

112

113

/**

114

* Ensures the cluster has at most the specified number of data nodes.

115

* Stops excess nodes if necessary.

116

*

117

* @param n maximum number of data nodes allowed

118

*/

119

public void ensureAtMostNumDataNodes(int n);

120

121

/**

122

* Returns a client connected to a specific node.

123

*

124

* @param nodeName name of the node

125

* @return client for the specified node

126

*/

127

public Client client(String nodeName);

128

129

/**

130

* Returns a client connected to a random node.

131

*

132

* @return client for a random node

133

*/

134

public Client client();

135

136

/**

137

* Returns a client that performs operations on the master node.

138

*

139

* @return master client

140

*/

141

public Client masterClient();

142

143

/**

144

* Returns a client connected to a non-master node.

145

*

146

* @return non-master client

147

*/

148

public Client nonMasterClient();

149

150

/**

151

* Returns a client connected to a data node.

152

*

153

* @return data node client

154

*/

155

public Client dataNodeClient();

156

157

/**

158

* Gets the current master node.

159

*

160

* @return name of the master node

161

*/

162

public String getMasterName();

163

164

/**

165

* Gets all node names in the cluster.

166

*

167

* @return set of all node names

168

*/

169

public Set<String> getNodeNames();

170

171

/**

172

* Gets names of all data nodes.

173

*

174

* @return set of data node names

175

*/

176

public Set<String> getDataNodeNames();

177

178

/**

179

* Gets names of all master-eligible nodes.

180

*

181

* @return set of master node names

182

*/

183

public Set<String> getMasterEligibleNodeNames();

184

185

/**

186

* Returns the ClusterService for a specific node.

187

*

188

* @param node node name

189

* @return ClusterService instance

190

*/

191

public ClusterService clusterService(String node);

192

193

/**

194

* Returns the current cluster state.

195

*

196

* @return current ClusterState

197

*/

198

public ClusterState clusterState();

199

200

/**

201

* Waits for the cluster to reach the specified state.

202

*

203

* @param statePredicate predicate to match desired state

204

* @param timeout maximum time to wait

205

*/

206

public void waitForState(Predicate<ClusterState> statePredicate, TimeValue timeout);

207

208

/**

209

* Waits for all nodes to agree on the cluster state.

210

*

211

* @param timeout maximum time to wait

212

*/

213

public void waitForConsensus(TimeValue timeout);

214

215

/**

216

* Forces a master election by stopping the current master.

217

*/

218

public void forceMasterElection();

219

220

/**

221

* Validates that the cluster is properly formed and healthy.

222

*/

223

public void validateClusterFormed();

224

225

/**

226

* Interface for callbacks executed during node restarts.

227

*/

228

public static interface RestartCallback {

229

/**

230

* Called with the settings that will be used for the restarted node.

231

*

232

* @param nodeSettings current node settings

233

* @return modified settings for restart

234

*/

235

Settings onNodeStopped(String nodeName);

236

237

/**

238

* Called before the node is restarted.

239

*

240

* @param nodeName name of the node being restarted

241

*/

242

boolean clearData(String nodeName);

243

}

244

}

245

```

246

247

### InternalTestCluster Usage Examples

248

249

```java

250

import org.elasticsearch.test.ESIntegTestCase;

251

import org.elasticsearch.test.InternalTestCluster;

252

253

public class ClusterManagementTest extends ESIntegTestCase {

254

255

public void testMultiNodeCluster() {

256

InternalTestCluster cluster = internalCluster();

257

258

// Start with minimum nodes

259

cluster.ensureAtLeastNumDataNodes(3);

260

261

// Verify cluster formation

262

cluster.validateClusterForformed();

263

264

assertThat(cluster.size(), greaterThanOrEqualTo(3));

265

assertThat(cluster.numDataNodes(), greaterThanOrEqualTo(3));

266

}

267

268

public void testNodeLifecycle() {

269

InternalTestCluster cluster = internalCluster();

270

271

// Start additional nodes

272

String dataNode = cluster.startDataOnlyNode(Settings.EMPTY);

273

String masterNode = cluster.startMasterOnlyNode(Settings.EMPTY);

274

String coordinatingNode = cluster.startCoordinatingOnlyNode(Settings.EMPTY);

275

276

// Verify nodes are running

277

assertTrue(cluster.getNodeNames().contains(dataNode));

278

assertTrue(cluster.getNodeNames().contains(masterNode));

279

assertTrue(cluster.getNodeNames().contains(coordinatingNode));

280

281

// Test node restart

282

cluster.restartNode(dataNode, new InternalTestCluster.RestartCallback() {

283

@Override

284

public Settings onNodeStopped(String nodeName) {

285

return Settings.builder()

286

.put("node.attr.restarted", true)

287

.build();

288

}

289

290

@Override

291

public boolean clearData(String nodeName) {

292

return false; // Keep data during restart

293

}

294

});

295

296

// Verify node restarted successfully

297

assertTrue(cluster.getNodeNames().contains(dataNode));

298

}

299

300

public void testMasterElection() {

301

InternalTestCluster cluster = internalCluster();

302

303

// Start master-eligible nodes

304

cluster.ensureAtLeastNumDataNodes(3);

305

306

String originalMaster = cluster.getMasterName();

307

assertThat(originalMaster, notNullValue());

308

309

// Force master election

310

cluster.forceMasterElection();

311

312

// Wait for new master

313

cluster.waitForState(state ->

314

state.nodes().getMasterNode() != null,

315

TimeValue.timeValueSeconds(30)

316

);

317

318

String newMaster = cluster.getMasterName();

319

assertThat(newMaster, notNullValue());

320

}

321

322

public void testClientTypes() {

323

InternalTestCluster cluster = internalCluster();

324

cluster.ensureAtLeastNumDataNodes(2);

325

326

// Test different client types

327

Client masterClient = cluster.masterClient();

328

Client dataClient = cluster.dataNodeClient();

329

Client randomClient = cluster.client();

330

331

// All clients should be able to perform operations

332

masterClient.admin().cluster().prepareHealth().get();

333

dataClient.admin().cluster().prepareHealth().get();

334

randomClient.admin().cluster().prepareHealth().get();

335

}

336

}

337

```

338

339

## NodeConfigurationSource

340

341

Interface for providing custom node configurations during cluster setup.

342

343

```{ .api }

344

package org.elasticsearch.test;

345

346

import org.elasticsearch.common.settings.Settings;

347

import org.elasticsearch.plugins.Plugin;

348

349

/**

350

* Interface for customizing node configuration in test clusters.

351

* Allows injection of custom settings, plugins, and node-specific configuration.

352

*/

353

public interface NodeConfigurationSource {

354

355

/**

356

* Returns settings that should be applied to all nodes.

357

*

358

* @param nodeOrdinal ordinal number of the node being configured

359

* @return settings for the node

360

*/

361

Settings nodeSettings(int nodeOrdinal);

362

363

/**

364

* Returns plugins that should be installed on cluster nodes.

365

*

366

* @return collection of plugin classes

367

*/

368

Collection<Class<? extends Plugin>> nodePlugins();

369

370

/**

371

* Returns settings that should be applied to transport clients.

372

*

373

* @return transport client settings

374

*/

375

Settings transportClientSettings();

376

}

377

```

378

379

## ClusterService Access

380

381

Utilities for accessing and manipulating cluster services in tests.

382

383

```{ .api }

384

package org.elasticsearch.test;

385

386

import org.elasticsearch.cluster.service.ClusterService;

387

import org.elasticsearch.cluster.ClusterState;

388

import org.elasticsearch.cluster.ClusterStateUpdateTask;

389

390

/**

391

* Utilities for working with ClusterService in tests.

392

*/

393

public class ClusterServiceUtils {

394

395

/**

396

* Sets a new cluster state directly (bypassing normal update mechanisms).

397

* Use with caution - primarily for testing specific cluster states.

398

*

399

* @param clusterService target cluster service

400

* @param clusterState new cluster state to set

401

*/

402

public static void setState(ClusterService clusterService, ClusterState clusterState);

403

404

/**

405

* Adds a high priority cluster state update task.

406

*

407

* @param clusterService target cluster service

408

* @param reason reason for the update

409

* @param task update task to execute

410

*/

411

public static void submitStateUpdateTask(ClusterService clusterService,

412

String reason,

413

ClusterStateUpdateTask task);

414

415

/**

416

* Creates a ClusterService instance for testing with the specified initial state.

417

*

418

* @param initialState initial cluster state

419

* @return new ClusterService instance

420

*/

421

public static ClusterService createClusterService(ClusterState initialState);

422

423

/**

424

* Creates a minimal cluster state for testing.

425

*

426

* @param clusterName name of the test cluster

427

* @return basic cluster state

428

*/

429

public static ClusterState createClusterState(String clusterName);

430

}

431

```

432

433

## Discovery and Node Management

434

435

Utilities for managing node discovery and cluster formation in tests.

436

437

```{ .api }

438

package org.elasticsearch.cluster.node;

439

440

import org.elasticsearch.cluster.node.DiscoveryNode;

441

import org.elasticsearch.common.transport.TransportAddress;

442

443

/**

444

* Utilities for creating and managing discovery nodes in tests.

445

*/

446

public class DiscoveryNodeUtils {

447

448

/**

449

* Creates a new discovery node with random configuration.

450

*

451

* @param id node identifier

452

* @return configured DiscoveryNode

453

*/

454

public static DiscoveryNode create(String id);

455

456

/**

457

* Creates a discovery node with specific roles.

458

*

459

* @param id node identifier

460

* @param address transport address

461

* @param roles set of node roles

462

* @return configured DiscoveryNode

463

*/

464

public static DiscoveryNode create(String id,

465

TransportAddress address,

466

Set<DiscoveryNodeRole> roles);

467

468

/**

469

* Creates a master-eligible node.

470

*

471

* @param id node identifier

472

* @return master-eligible DiscoveryNode

473

*/

474

public static DiscoveryNode createMasterNode(String id);

475

476

/**

477

* Creates a data-only node.

478

*

479

* @param id node identifier

480

* @return data-only DiscoveryNode

481

*/

482

public static DiscoveryNode createDataNode(String id);

483

484

/**

485

* Creates a coordinating-only node.

486

*

487

* @param id node identifier

488

* @return coordinating-only DiscoveryNode

489

*/

490

public static DiscoveryNode createCoordinatingNode(String id);

491

492

/**

493

* Generates a random transport address for testing.

494

*

495

* @return random TransportAddress

496

*/

497

public static TransportAddress randomAddress();

498

}

499

```

500

501

## Cluster Scope Configuration

502

503

Advanced cluster configuration options for fine-tuning test cluster behavior.

504

505

```{ .api }

506

package org.elasticsearch.test;

507

508

/**

509

* Configuration options for test cluster scope and behavior.

510

*/

511

public class ClusterScope {

512

513

/**

514

* Defines when cluster instances are created and destroyed.

515

*/

516

public enum Scope {

517

/** Single cluster instance for entire test suite */

518

SUITE,

519

/** New cluster instance for each test method */

520

TEST

521

}

522

523

/**

524

* Transport client usage ratio for testing client behavior.

525

* 0.0 = always use node clients, 1.0 = always use transport clients

526

*/

527

public static final double TRANSPORT_CLIENT_RATIO = 0.3;

528

529

/**

530

* Default minimum number of master-eligible nodes for test clusters.

531

*/

532

public static final int DEFAULT_MIN_MASTER_NODES = 1;

533

534

/**

535

* Default settings applied to all test clusters.

536

*/

537

public static final Settings DEFAULT_SETTINGS = Settings.builder()

538

.put("discovery.type", "zen")

539

.put("transport.type", "netty4")

540

.put("http.type", "netty4")

541

.put("cluster.routing.rebalance.enable", "all")

542

.build();

543

}

544

```

545

546

### Advanced Cluster Management Example

547

548

```java

549

import org.elasticsearch.test.ESIntegTestCase;

550

import org.elasticsearch.cluster.node.DiscoveryNodeRole;

551

552

@ESIntegTestCase.ClusterScope(

553

scope = ESIntegTestCase.Scope.TEST,

554

numDataNodes = 5,

555

numClientNodes = 1

556

)

557

public class AdvancedClusterTest extends ESIntegTestCase {

558

559

public void testLargeClusterOperations() {

560

InternalTestCluster cluster = internalCluster();

561

562

// Verify cluster size

563

assertThat(cluster.numDataNodes(), equalTo(5));

564

565

// Test rolling restart of all data nodes

566

Set<String> dataNodes = cluster.getDataNodeNames();

567

for (String node : dataNodes) {

568

cluster.restartNode(node, new InternalTestCluster.RestartCallback() {

569

@Override

570

public Settings onNodeStopped(String nodeName) {

571

return Settings.builder()

572

.put("node.attr.generation", "2")

573

.build();

574

}

575

576

@Override

577

public boolean clearData(String nodeName) {

578

return false;

579

}

580

});

581

582

// Wait for cluster to stabilize after each restart

583

ensureGreen();

584

}

585

586

// Verify all nodes have new attribute

587

NodesInfoResponse nodesInfo = client().admin().cluster().prepareNodesInfo().get();

588

for (NodeInfo nodeInfo : nodesInfo.getNodes()) {

589

if (nodeInfo.getNode().isDataNode()) {

590

assertThat(nodeInfo.getNode().getAttributes().get("generation"), equalTo("2"));

591

}

592

}

593

}

594

595

public void testMasterFailover() {

596

InternalTestCluster cluster = internalCluster();

597

598

// Ensure we have multiple master-eligible nodes

599

cluster.ensureAtLeastNumDataNodes(3);

600

601

String originalMaster = cluster.getMasterName();

602

603

// Stop the current master

604

cluster.stopNode(originalMaster);

605

606

// Wait for new master election

607

cluster.waitForState(state -> {

608

DiscoveryNode master = state.nodes().getMasterNode();

609

return master != null && !master.getName().equals(originalMaster);

610

}, TimeValue.timeValueSeconds(30));

611

612

// Verify cluster is still functional

613

ensureGreen();

614

615

String newMaster = cluster.getMasterName();

616

assertThat(newMaster, not(equalTo(originalMaster)));

617

}

618

619

@Override

620

protected Settings nodeSettings(int nodeOrdinal) {

621

return Settings.builder()

622

.put(super.nodeSettings(nodeOrdinal))

623

.put("node.name", "test-node-" + nodeOrdinal)

624

.put("cluster.routing.allocation.disk.threshold_enabled", false)

625

.build();

626

}

627

}

628

```

629

630

## Best Practices

631

632

### Cluster Sizing

633

- Use minimum viable cluster size for your test scenarios

634

- Prefer single node tests (`SingleNodeTestCase`) when cluster features aren't needed

635

- Consider using `@ClusterScope(scope=TEST)` for tests that modify cluster state

636

637

### Node Management

638

- Always ensure cluster health after node operations (`ensureGreen()`)

639

- Use appropriate timeouts for cluster state changes

640

- Clean up custom nodes and settings between tests

641

642

### Performance Optimization

643

- Reuse clusters at suite level when possible (`Scope.SUITE`)

644

- Minimize cluster restarts and node operations

645

- Use targeted health checks rather than waiting for full green status

646

647

### Resource Management

648

- Properly close clients and resources

649

- Monitor test cluster resource usage in CI environments

650

- Use appropriate cleanup in tearDown methods

651

652

The cluster management capabilities provide comprehensive control over multi-node test scenarios, enabling testing of complex distributed system behaviors while maintaining test isolation and repeatability.