or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

assertions-matchers.mdcluster-management.mdcore-testing.mddata-generation.mdindex.mdmock-implementations.mdspecialized-testing.md

data-generation.mddocs/

0

# Data Generation

1

2

The Elasticsearch test framework provides comprehensive data generation utilities for creating realistic test documents, field data, and synthetic datasets. These utilities enable testing with varied, randomized data that closely mimics production scenarios.

3

4

## Document Generation

5

6

### DocumentGenerator

7

8

Core utility for generating synthetic documents with configurable schemas and data patterns.

9

10

```{ .api }

11

package org.elasticsearch.logsdb.datageneration;

12

13

import org.elasticsearch.xcontent.XContentBuilder;

14

import org.elasticsearch.xcontent.XContentType;

15

16

/**

17

* Generates synthetic documents for testing with configurable field types,

18

* data distributions, and document structures. Supports complex nested

19

* objects and realistic data patterns.

20

*/

21

public class DocumentGenerator {

22

23

/**

24

* Configuration for document generation behavior.

25

*/

26

public static class Config {

27

/** Maximum nesting depth for objects */

28

public final int maxObjectDepth;

29

/** Maximum number of fields per object */

30

public final int maxFields;

31

/** Probability of generating null values */

32

public final double nullValueProbability;

33

/** Field name generation strategy */

34

public final FieldNamingStrategy namingStrategy;

35

36

public Config(int maxObjectDepth,

37

int maxFields,

38

double nullValueProbability,

39

FieldNamingStrategy namingStrategy);

40

}

41

42

/**

43

* Creates a DocumentGenerator with the specified configuration.

44

*

45

* @param config generation configuration

46

*/

47

public DocumentGenerator(Config config);

48

49

/**

50

* Creates a DocumentGenerator with default configuration.

51

*/

52

public DocumentGenerator();

53

54

/**

55

* Generates a single document with random structure and content.

56

*

57

* @return generated document as XContent bytes

58

* @throws IOException on serialization failure

59

*/

60

public BytesReference generate() throws IOException;

61

62

/**

63

* Generates a document with the specified XContent type.

64

*

65

* @param contentType XContent format to use

66

* @return generated document as XContent bytes

67

* @throws IOException on serialization failure

68

*/

69

public BytesReference generate(XContentType contentType) throws IOException;

70

71

/**

72

* Generates a document following the provided template structure.

73

*

74

* @param template document template defining structure

75

* @return generated document matching template

76

* @throws IOException on serialization failure

77

*/

78

public BytesReference generateFromTemplate(Template template) throws IOException;

79

80

/**

81

* Generates multiple documents with consistent schema but varied content.

82

*

83

* @param count number of documents to generate

84

* @return list of generated documents

85

* @throws IOException on generation failure

86

*/

87

public List<BytesReference> generateBatch(int count) throws IOException;

88

89

/**

90

* Generates a document and writes it directly to an XContentBuilder.

91

*

92

* @param builder XContent builder to write to

93

* @throws IOException on write failure

94

*/

95

public void generateInto(XContentBuilder builder) throws IOException;

96

97

/**

98

* Generates document source suitable for indexing operations.

99

*

100

* @return document source as Map

101

* @throws IOException on generation failure

102

*/

103

public Map<String, Object> generateSource() throws IOException;

104

105

/**

106

* Sets a seed for reproducible document generation.

107

*

108

* @param seed random seed for reproducible generation

109

* @return this DocumentGenerator for fluent configuration

110

*/

111

public DocumentGenerator withSeed(long seed);

112

113

/**

114

* Configures the generator to use predefined field configurations.

115

*

116

* @param fields predefined field configurations

117

* @return this DocumentGenerator for fluent configuration

118

*/

119

public DocumentGenerator withPredefinedFields(List<PredefinedField> fields);

120

121

/**

122

* Configures dynamic mapping behavior for generated documents.

123

*

124

* @param dynamicMapping dynamic mapping configuration

125

* @return this DocumentGenerator for fluent configuration

126

*/

127

public DocumentGenerator withDynamicMapping(DynamicMapping dynamicMapping);

128

}

129

```

130

131

### Template

132

133

Document template system for defining consistent document structures.

134

135

```{ .api }

136

package org.elasticsearch.logsdb.datageneration;

137

138

import java.util.Map;

139

import java.util.function.Supplier;

140

141

/**

142

* Defines a template for document generation with specified field types,

143

* constraints, and generation rules. Enables consistent document structure

144

* across multiple generated documents.

145

*/

146

public class Template {

147

148

/**

149

* Builder for creating document templates with fluent configuration.

150

*/

151

public static class Builder {

152

153

/**

154

* Adds a text field to the template.

155

*

156

* @param fieldName name of the field

157

* @param config text field configuration

158

* @return builder for fluent configuration

159

*/

160

public Builder addTextField(String fieldName, TextFieldConfig config);

161

162

/**

163

* Adds a keyword field to the template.

164

*

165

* @param fieldName name of the field

166

* @param values possible keyword values

167

* @return builder for fluent configuration

168

*/

169

public Builder addKeywordField(String fieldName, String... values);

170

171

/**

172

* Adds a numeric field to the template.

173

*

174

* @param fieldName name of the field

175

* @param type numeric type (int, long, float, double)

176

* @param min minimum value (inclusive)

177

* @param max maximum value (inclusive)

178

* @return builder for fluent configuration

179

*/

180

public Builder addNumericField(String fieldName,

181

NumericType type,

182

Number min,

183

Number max);

184

185

/**

186

* Adds a date field to the template.

187

*

188

* @param fieldName name of the field

189

* @param format date format pattern

190

* @param startDate earliest possible date

191

* @param endDate latest possible date

192

* @return builder for fluent configuration

193

*/

194

public Builder addDateField(String fieldName,

195

String format,

196

Instant startDate,

197

Instant endDate);

198

199

/**

200

* Adds a boolean field to the template.

201

*

202

* @param fieldName name of the field

203

* @param trueProbability probability of true values (0.0 to 1.0)

204

* @return builder for fluent configuration

205

*/

206

public Builder addBooleanField(String fieldName, double trueProbability);

207

208

/**

209

* Adds a nested object field to the template.

210

*

211

* @param fieldName name of the field

212

* @param nestedTemplate template for nested object structure

213

* @return builder for fluent configuration

214

*/

215

public Builder addObjectField(String fieldName, Template nestedTemplate);

216

217

/**

218

* Adds an array field to the template.

219

*

220

* @param fieldName name of the field

221

* @param elementTemplate template for array elements

222

* @param minSize minimum array size

223

* @param maxSize maximum array size

224

* @return builder for fluent configuration

225

*/

226

public Builder addArrayField(String fieldName,

227

Template elementTemplate,

228

int minSize,

229

int maxSize);

230

231

/**

232

* Adds a custom field with a value generator function.

233

*

234

* @param fieldName name of the field

235

* @param generator function to generate field values

236

* @return builder for fluent configuration

237

*/

238

public Builder addCustomField(String fieldName, Supplier<Object> generator);

239

240

/**

241

* Sets the probability that fields will be present in generated documents.

242

*

243

* @param probability field presence probability (0.0 to 1.0)

244

* @return builder for fluent configuration

245

*/

246

public Builder withFieldPresenceProbability(double probability);

247

248

/**

249

* Builds the template from the configured fields.

250

*

251

* @return configured Template instance

252

*/

253

public Template build();

254

}

255

256

/**

257

* Creates a new template builder.

258

*

259

* @return Template.Builder for fluent configuration

260

*/

261

public static Builder builder();

262

263

/**

264

* Generates a document following this template.

265

*

266

* @param generator document generator to use

267

* @return generated document

268

* @throws IOException on generation failure

269

*/

270

public BytesReference generate(DocumentGenerator generator) throws IOException;

271

272

/**

273

* Returns the field configurations defined in this template.

274

*

275

* @return map of field name to field configuration

276

*/

277

public Map<String, FieldConfig> getFields();

278

279

/**

280

* Validates that the template configuration is valid.

281

*

282

* @throws IllegalArgumentException if template is invalid

283

*/

284

public void validate();

285

}

286

```

287

288

## Field Data Generation

289

290

### FieldDataGenerator

291

292

Base class for generating data for specific field types.

293

294

```{ .api }

295

package org.elasticsearch.logsdb.datageneration;

296

297

/**

298

* Abstract base class for generating data for specific Elasticsearch field types.

299

* Provides extensible framework for custom field data generation strategies.

300

*/

301

public abstract class FieldDataGenerator {

302

303

/**

304

* Configuration for field data generation.

305

*/

306

protected final FieldConfig config;

307

308

/**

309

* Creates a field data generator with the specified configuration.

310

*

311

* @param config field generation configuration

312

*/

313

protected FieldDataGenerator(FieldConfig config);

314

315

/**

316

* Generates a single field value.

317

*

318

* @return generated field value

319

*/

320

public abstract Object generateValue();

321

322

/**

323

* Generates multiple field values for array fields.

324

*

325

* @param count number of values to generate

326

* @return list of generated values

327

*/

328

public List<Object> generateValues(int count);

329

330

/**

331

* Indicates whether this field type supports null values.

332

*

333

* @return true if null values are supported

334

*/

335

public abstract boolean supportsNulls();

336

337

/**

338

* Generates a null value if supported by this field type.

339

*

340

* @return null or appropriate null representation

341

*/

342

public Object generateNull();

343

344

/**

345

* Creates a field data generator for the specified Elasticsearch field type.

346

*

347

* @param fieldType Elasticsearch field type name

348

* @param config field configuration

349

* @return appropriate FieldDataGenerator implementation

350

*/

351

public static FieldDataGenerator forFieldType(String fieldType, FieldConfig config);

352

}

353

```

354

355

### Specialized Field Generators

356

357

```{ .api }

358

package org.elasticsearch.logsdb.datageneration.fields;

359

360

/**

361

* Generator for text field data with realistic content patterns.

362

*/

363

public class TextFieldGenerator extends FieldDataGenerator {

364

365

/**

366

* Configuration for text field generation.

367

*/

368

public static class TextConfig extends FieldConfig {

369

/** Minimum text length */

370

public final int minLength;

371

/** Maximum text length */

372

public final int maxLength;

373

/** Text generation strategy */

374

public final TextGenerationStrategy strategy;

375

/** Language for generated text */

376

public final Locale locale;

377

378

public TextConfig(int minLength,

379

int maxLength,

380

TextGenerationStrategy strategy,

381

Locale locale);

382

}

383

384

/**

385

* Enumeration of text generation strategies.

386

*/

387

public enum TextGenerationStrategy {

388

/** Random alphanumeric strings */

389

ALPHANUMERIC,

390

/** Lorem ipsum style text */

391

LOREM_IPSUM,

392

/** Realistic sentences with grammar */

393

SENTENCES,

394

/** Log message patterns */

395

LOG_MESSAGES,

396

/** Email addresses */

397

EMAIL_ADDRESSES,

398

/** URLs */

399

URLS

400

}

401

402

@Override

403

public Object generateValue() {

404

TextConfig config = (TextConfig) this.config;

405

406

switch (config.strategy) {

407

case ALPHANUMERIC:

408

return generateAlphanumeric(config.minLength, config.maxLength);

409

case LOREM_IPSUM:

410

return generateLoremIpsum(config.minLength, config.maxLength);

411

case SENTENCES:

412

return generateSentences(config.minLength, config.maxLength, config.locale);

413

case LOG_MESSAGES:

414

return generateLogMessage();

415

case EMAIL_ADDRESSES:

416

return generateEmailAddress();

417

case URLS:

418

return generateUrl();

419

default:

420

throw new IllegalArgumentException("Unknown strategy: " + config.strategy);

421

}

422

}

423

}

424

425

/**

426

* Generator for keyword field data with controlled vocabularies.

427

*/

428

public class KeywordFieldGenerator extends FieldDataGenerator {

429

430

/**

431

* Configuration for keyword field generation.

432

*/

433

public static class KeywordConfig extends FieldConfig {

434

/** Predefined vocabulary of possible values */

435

public final List<String> vocabulary;

436

/** Whether to generate values outside vocabulary */

437

public final boolean allowCustomValues;

438

/** Distribution strategy for value selection */

439

public final DistributionStrategy distribution;

440

441

public KeywordConfig(List<String> vocabulary,

442

boolean allowCustomValues,

443

DistributionStrategy distribution);

444

}

445

446

/**

447

* Value distribution strategies for keyword generation.

448

*/

449

public enum DistributionStrategy {

450

/** Uniform random selection */

451

UNIFORM,

452

/** Zipfian distribution (some values more frequent) */

453

ZIPFIAN,

454

/** Normal distribution around median values */

455

NORMAL

456

}

457

458

@Override

459

public Object generateValue() {

460

KeywordConfig config = (KeywordConfig) this.config;

461

462

if (config.allowCustomValues && random().nextDouble() < 0.1) {

463

// 10% chance of generating custom value

464

return "custom_" + randomAlphaOfLength(8);

465

}

466

467

return selectFromVocabulary(config.vocabulary, config.distribution);

468

}

469

}

470

471

/**

472

* Generator for numeric field data with realistic distributions.

473

*/

474

public class NumericFieldGenerator extends FieldDataGenerator {

475

476

/**

477

* Configuration for numeric field generation.

478

*/

479

public static class NumericConfig extends FieldConfig {

480

/** Numeric field type */

481

public final NumericType type;

482

/** Minimum value (inclusive) */

483

public final Number minValue;

484

/** Maximum value (inclusive) */

485

public final Number maxValue;

486

/** Distribution type for value generation */

487

public final NumericDistribution distribution;

488

489

public NumericConfig(NumericType type,

490

Number minValue,

491

Number maxValue,

492

NumericDistribution distribution);

493

}

494

495

/**

496

* Numeric field types supported by Elasticsearch.

497

*/

498

public enum NumericType {

499

BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE, SCALED_FLOAT

500

}

501

502

/**

503

* Distribution patterns for numeric value generation.

504

*/

505

public enum NumericDistribution {

506

/** Uniform random distribution */

507

UNIFORM,

508

/** Normal (Gaussian) distribution */

509

NORMAL,

510

/** Exponential distribution */

511

EXPONENTIAL,

512

/** Log-normal distribution */

513

LOG_NORMAL

514

}

515

516

@Override

517

public Object generateValue() {

518

NumericConfig config = (NumericConfig) this.config;

519

520

double rawValue = generateDistributedValue(config.distribution,

521

config.minValue.doubleValue(),

522

config.maxValue.doubleValue());

523

524

return convertToTargetType(rawValue, config.type);

525

}

526

}

527

```

528

529

### PredefinedField

530

531

Configuration for predefined field types with specific generation rules.

532

533

```{ .api }

534

package org.elasticsearch.logsdb.datageneration;

535

536

/**

537

* Represents a predefined field configuration with specific generation rules

538

* and constraints. Used to create consistent field behavior across documents.

539

*/

540

public class PredefinedField {

541

542

/** Field name */

543

private final String name;

544

/** Field type (text, keyword, long, etc.) */

545

private final String type;

546

/** Field-specific configuration */

547

private final FieldConfig config;

548

/** Mapping properties for this field */

549

private final Map<String, Object> mapping;

550

551

/**

552

* Creates a predefined field configuration.

553

*

554

* @param name field name

555

* @param type Elasticsearch field type

556

* @param config field generation configuration

557

* @param mapping field mapping properties

558

*/

559

public PredefinedField(String name,

560

String type,

561

FieldConfig config,

562

Map<String, Object> mapping);

563

564

/**

565

* Creates a text field with default configuration.

566

*

567

* @param name field name

568

* @return configured PredefinedField for text

569

*/

570

public static PredefinedField text(String name);

571

572

/**

573

* Creates a text field with custom configuration.

574

*

575

* @param name field name

576

* @param analyzer analyzer to use

577

* @param minLength minimum text length

578

* @param maxLength maximum text length

579

* @return configured PredefinedField for text

580

*/

581

public static PredefinedField text(String name,

582

String analyzer,

583

int minLength,

584

int maxLength);

585

586

/**

587

* Creates a keyword field with specified vocabulary.

588

*

589

* @param name field name

590

* @param vocabulary possible keyword values

591

* @return configured PredefinedField for keyword

592

*/

593

public static PredefinedField keyword(String name, String... vocabulary);

594

595

/**

596

* Creates a long field with specified range.

597

*

598

* @param name field name

599

* @param min minimum value

600

* @param max maximum value

601

* @return configured PredefinedField for long

602

*/

603

public static PredefinedField longField(String name, long min, long max);

604

605

/**

606

* Creates a date field with specified format and range.

607

*

608

* @param name field name

609

* @param format date format pattern

610

* @param start earliest date

611

* @param end latest date

612

* @return configured PredefinedField for date

613

*/

614

public static PredefinedField dateField(String name,

615

String format,

616

Instant start,

617

Instant end);

618

619

/**

620

* Creates a boolean field with specified true probability.

621

*

622

* @param name field name

623

* @param trueProbability probability of true values (0.0 to 1.0)

624

* @return configured PredefinedField for boolean

625

*/

626

public static PredefinedField booleanField(String name, double trueProbability);

627

628

/**

629

* Creates a geo_point field with specified bounds.

630

*

631

* @param name field name

632

* @param bounds geographic bounding box

633

* @return configured PredefinedField for geo_point

634

*/

635

public static PredefinedField geoPoint(String name, Rectangle bounds);

636

637

/**

638

* Creates an IP address field.

639

*

640

* @param name field name

641

* @param ipVersion IP version (4 or 6)

642

* @return configured PredefinedField for ip

643

*/

644

public static PredefinedField ipField(String name, int ipVersion);

645

646

/**

647

* Generates a value for this field using the configured generator.

648

*

649

* @return generated field value

650

*/

651

public Object generateValue();

652

653

/**

654

* Returns the field mapping configuration.

655

*

656

* @return field mapping as Map

657

*/

658

public Map<String, Object> getMapping();

659

660

/**

661

* Returns the field name.

662

*

663

* @return field name

664

*/

665

public String getName();

666

667

/**

668

* Returns the Elasticsearch field type.

669

*

670

* @return field type

671

*/

672

public String getType();

673

}

674

```

675

676

## Usage Examples

677

678

### Basic Document Generation

679

680

```java

681

import org.elasticsearch.logsdb.datageneration.DocumentGenerator;

682

import org.elasticsearch.logsdb.datageneration.Template;

683

684

public class DataGenerationTest extends ESTestCase {

685

686

public void testBasicDocumentGeneration() throws IOException {

687

DocumentGenerator generator = new DocumentGenerator();

688

689

// Generate a single random document

690

BytesReference document = generator.generate();

691

assertThat(document, notNullValue());

692

693

// Parse and validate structure

694

XContentParser parser = createParser(XContentType.JSON, document);

695

Map<String, Object> source = parser.map();

696

assertThat(source.size(), greaterThan(0));

697

}

698

699

public void testTemplateBasedGeneration() throws IOException {

700

Template template = Template.builder()

701

.addTextField("title", new TextFieldGenerator.TextConfig(

702

10, 50, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))

703

.addKeywordField("category", "news", "sports", "technology", "entertainment")

704

.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 0L, 10000L)

705

.addDateField("published", "yyyy-MM-dd'T'HH:mm:ss.SSSZ",

706

Instant.now().minus(365, ChronoUnit.DAYS), Instant.now())

707

.addBooleanField("featured", 0.2) // 20% chance of being featured

708

.build();

709

710

DocumentGenerator generator = new DocumentGenerator();

711

712

// Generate multiple documents with consistent schema

713

for (int i = 0; i < 10; i++) {

714

BytesReference doc = template.generate(generator);

715

716

XContentParser parser = createParser(XContentType.JSON, doc);

717

Map<String, Object> source = parser.map();

718

719

// Verify expected fields are present

720

assertTrue("Should have title", source.containsKey("title"));

721

assertTrue("Should have category", source.containsKey("category"));

722

assertTrue("Should have views", source.containsKey("views"));

723

724

// Verify field types and constraints

725

assertThat(source.get("title"), instanceOf(String.class));

726

assertThat(source.get("category"), anyOf(

727

equalTo("news"), equalTo("sports"),

728

equalTo("technology"), equalTo("entertainment")));

729

730

Long views = (Long) source.get("views");

731

assertThat(views, allOf(greaterThanOrEqualTo(0L), lessThanOrEqualTo(10000L)));

732

}

733

}

734

735

public void testPredefinedFieldGeneration() throws IOException {

736

List<PredefinedField> fields = Arrays.asList(

737

PredefinedField.text("message").withStrategy(LOG_MESSAGES),

738

PredefinedField.keyword("level", "DEBUG", "INFO", "WARN", "ERROR"),

739

PredefinedField.longField("timestamp",

740

Instant.now().minus(7, ChronoUnit.DAYS).toEpochMilli(),

741

Instant.now().toEpochMilli()),

742

PredefinedField.ipField("client_ip", 4),

743

PredefinedField.geoPoint("location", new Rectangle(-180, 180, -90, 90))

744

);

745

746

DocumentGenerator generator = new DocumentGenerator()

747

.withPredefinedFields(fields)

748

.withSeed(12345); // Reproducible generation

749

750

// Generate log-like documents

751

List<BytesReference> documents = generator.generateBatch(100);

752

753

assertThat(documents.size(), equalTo(100));

754

755

// Verify all documents have expected structure

756

for (BytesReference doc : documents) {

757

XContentParser parser = createParser(XContentType.JSON, doc);

758

Map<String, Object> source = parser.map();

759

760

assertThat(source, hasKey("message"));

761

assertThat(source, hasKey("level"));

762

assertThat(source, hasKey("timestamp"));

763

assertThat(source, hasKey("client_ip"));

764

assertThat(source, hasKey("location"));

765

}

766

}

767

}

768

```

769

770

### Integration with Index Creation

771

772

```java

773

public class DocumentGenerationIntegrationTest extends ESIntegTestCase {

774

775

public void testGeneratedDataIndexing() throws IOException {

776

// Create index with mapping for generated data

777

String mapping = """

778

{

779

"properties": {

780

"title": {"type": "text", "analyzer": "english"},

781

"category": {"type": "keyword"},

782

"views": {"type": "long"},

783

"published": {"type": "date"},

784

"featured": {"type": "boolean"},

785

"tags": {"type": "keyword"}

786

}

787

}""";

788

789

createIndex("test-index", Settings.EMPTY, mapping);

790

791

// Generate and index documents

792

Template template = Template.builder()

793

.addTextField("title", new TextFieldGenerator.TextConfig(

794

20, 100, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))

795

.addKeywordField("category", "technology", "science", "business")

796

.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 1L, 100000L)

797

.addDateField("published", "yyyy-MM-dd",

798

Instant.parse("2023-01-01T00:00:00Z"), Instant.now())

799

.addBooleanField("featured", 0.15)

800

.addArrayField("tags",

801

Template.builder().addKeywordField("tag",

802

"ai", "machine-learning", "elasticsearch", "search", "analytics").build(),

803

1, 5)

804

.build();

805

806

DocumentGenerator generator = new DocumentGenerator();

807

808

// Index generated documents

809

BulkRequestBuilder bulk = client().prepareBulk();

810

for (int i = 0; i < 1000; i++) {

811

Map<String, Object> source = generator.generateSource();

812

bulk.add(client().prepareIndex("test-index")

813

.setId(String.valueOf(i))

814

.setSource(source));

815

}

816

817

BulkResponse bulkResponse = bulk.get();

818

assertNoFailures(bulkResponse);

819

820

refresh("test-index");

821

822

// Verify generated data is searchable

823

SearchResponse response = client().prepareSearch("test-index")

824

.setQuery(QueryBuilders.matchQuery("title", "test"))

825

.setSize(0)

826

.get();

827

828

assertThat(response.getHits().getTotalHits().value, greaterThan(0L));

829

830

// Verify aggregations work on generated data

831

response = client().prepareSearch("test-index")

832

.setQuery(QueryBuilders.matchAllQuery())

833

.addAggregation(AggregationBuilders.terms("categories").field("category"))

834

.addAggregation(AggregationBuilders.stats("view_stats").field("views"))

835

.get();

836

837

Terms categories = response.getAggregations().get("categories");

838

assertThat(categories.getBuckets().size(), equalTo(3));

839

840

Stats viewStats = response.getAggregations().get("view_stats");

841

assertThat(viewStats.getCount(), equalTo(1000L));

842

assertThat(viewStats.getMin(), greaterThanOrEqualTo(1.0));

843

assertThat(viewStats.getMax(), lessThanOrEqualTo(100000.0));

844

}

845

}

846

```

847

848

## Best Practices

849

850

### Data Realism

851

- Use realistic value distributions rather than purely random data

852

- Include edge cases and boundary values in generated data

853

- Model real-world data patterns and relationships

854

855

### Performance Optimization

856

- Use seeds for reproducible test data when debugging

857

- Generate data in batches for better performance

858

- Cache expensive generators when possible

859

860

### Schema Design

861

- Define clear templates for consistent document structure

862

- Use predefined fields for common data types

863

- Include proper field mapping configurations

864

865

### Testing Coverage

866

- Generate data that exercises all mapping types used in production

867

- Include null values and missing fields in test data

868

- Test with various document sizes and complexity levels

869

870

The data generation utilities provide powerful capabilities for creating realistic, varied test datasets that enable comprehensive testing of Elasticsearch functionality with production-like data patterns.