0
# Data Generation
1
2
The Elasticsearch test framework provides comprehensive data generation utilities for creating realistic test documents, field data, and synthetic datasets. These utilities enable testing with varied, randomized data that closely mimics production scenarios.
3
4
## Document Generation
5
6
### DocumentGenerator
7
8
Core utility for generating synthetic documents with configurable schemas and data patterns.
9
10
```{ .api }
11
package org.elasticsearch.logsdb.datageneration;
12
13
import org.elasticsearch.xcontent.XContentBuilder;
14
import org.elasticsearch.xcontent.XContentType;
15
16
/**
17
* Generates synthetic documents for testing with configurable field types,
18
* data distributions, and document structures. Supports complex nested
19
* objects and realistic data patterns.
20
*/
21
public class DocumentGenerator {
22
23
/**
24
* Configuration for document generation behavior.
25
*/
26
public static class Config {
27
/** Maximum nesting depth for objects */
28
public final int maxObjectDepth;
29
/** Maximum number of fields per object */
30
public final int maxFields;
31
/** Probability of generating null values */
32
public final double nullValueProbability;
33
/** Field name generation strategy */
34
public final FieldNamingStrategy namingStrategy;
35
36
public Config(int maxObjectDepth,
37
int maxFields,
38
double nullValueProbability,
39
FieldNamingStrategy namingStrategy);
40
}
41
42
/**
43
* Creates a DocumentGenerator with the specified configuration.
44
*
45
* @param config generation configuration
46
*/
47
public DocumentGenerator(Config config);
48
49
/**
50
* Creates a DocumentGenerator with default configuration.
51
*/
52
public DocumentGenerator();
53
54
/**
55
* Generates a single document with random structure and content.
56
*
57
* @return generated document as XContent bytes
58
* @throws IOException on serialization failure
59
*/
60
public BytesReference generate() throws IOException;
61
62
/**
63
* Generates a document with the specified XContent type.
64
*
65
* @param contentType XContent format to use
66
* @return generated document as XContent bytes
67
* @throws IOException on serialization failure
68
*/
69
public BytesReference generate(XContentType contentType) throws IOException;
70
71
/**
72
* Generates a document following the provided template structure.
73
*
74
* @param template document template defining structure
75
* @return generated document matching template
76
* @throws IOException on serialization failure
77
*/
78
public BytesReference generateFromTemplate(Template template) throws IOException;
79
80
/**
81
* Generates multiple documents with consistent schema but varied content.
82
*
83
* @param count number of documents to generate
84
* @return list of generated documents
85
* @throws IOException on generation failure
86
*/
87
public List<BytesReference> generateBatch(int count) throws IOException;
88
89
/**
90
* Generates a document and writes it directly to an XContentBuilder.
91
*
92
* @param builder XContent builder to write to
93
* @throws IOException on write failure
94
*/
95
public void generateInto(XContentBuilder builder) throws IOException;
96
97
/**
98
* Generates document source suitable for indexing operations.
99
*
100
* @return document source as Map
101
* @throws IOException on generation failure
102
*/
103
public Map<String, Object> generateSource() throws IOException;
104
105
/**
106
* Sets a seed for reproducible document generation.
107
*
108
* @param seed random seed for reproducible generation
109
* @return this DocumentGenerator for fluent configuration
110
*/
111
public DocumentGenerator withSeed(long seed);
112
113
/**
114
* Configures the generator to use predefined field configurations.
115
*
116
* @param fields predefined field configurations
117
* @return this DocumentGenerator for fluent configuration
118
*/
119
public DocumentGenerator withPredefinedFields(List<PredefinedField> fields);
120
121
/**
122
* Configures dynamic mapping behavior for generated documents.
123
*
124
* @param dynamicMapping dynamic mapping configuration
125
* @return this DocumentGenerator for fluent configuration
126
*/
127
public DocumentGenerator withDynamicMapping(DynamicMapping dynamicMapping);
128
}
129
```
130
131
### Template
132
133
Document template system for defining consistent document structures.
134
135
```{ .api }
136
package org.elasticsearch.logsdb.datageneration;
137
138
import java.util.Map;
139
import java.util.function.Supplier;
140
141
/**
142
* Defines a template for document generation with specified field types,
143
* constraints, and generation rules. Enables consistent document structure
144
* across multiple generated documents.
145
*/
146
public class Template {
147
148
/**
149
* Builder for creating document templates with fluent configuration.
150
*/
151
public static class Builder {
152
153
/**
154
* Adds a text field to the template.
155
*
156
* @param fieldName name of the field
157
* @param config text field configuration
158
* @return builder for fluent configuration
159
*/
160
public Builder addTextField(String fieldName, TextFieldConfig config);
161
162
/**
163
* Adds a keyword field to the template.
164
*
165
* @param fieldName name of the field
166
* @param values possible keyword values
167
* @return builder for fluent configuration
168
*/
169
public Builder addKeywordField(String fieldName, String... values);
170
171
/**
172
* Adds a numeric field to the template.
173
*
174
* @param fieldName name of the field
175
* @param type numeric type (int, long, float, double)
176
* @param min minimum value (inclusive)
177
* @param max maximum value (inclusive)
178
* @return builder for fluent configuration
179
*/
180
public Builder addNumericField(String fieldName,
181
NumericType type,
182
Number min,
183
Number max);
184
185
/**
186
* Adds a date field to the template.
187
*
188
* @param fieldName name of the field
189
* @param format date format pattern
190
* @param startDate earliest possible date
191
* @param endDate latest possible date
192
* @return builder for fluent configuration
193
*/
194
public Builder addDateField(String fieldName,
195
String format,
196
Instant startDate,
197
Instant endDate);
198
199
/**
200
* Adds a boolean field to the template.
201
*
202
* @param fieldName name of the field
203
* @param trueProbability probability of true values (0.0 to 1.0)
204
* @return builder for fluent configuration
205
*/
206
public Builder addBooleanField(String fieldName, double trueProbability);
207
208
/**
209
* Adds a nested object field to the template.
210
*
211
* @param fieldName name of the field
212
* @param nestedTemplate template for nested object structure
213
* @return builder for fluent configuration
214
*/
215
public Builder addObjectField(String fieldName, Template nestedTemplate);
216
217
/**
218
* Adds an array field to the template.
219
*
220
* @param fieldName name of the field
221
* @param elementTemplate template for array elements
222
* @param minSize minimum array size
223
* @param maxSize maximum array size
224
* @return builder for fluent configuration
225
*/
226
public Builder addArrayField(String fieldName,
227
Template elementTemplate,
228
int minSize,
229
int maxSize);
230
231
/**
232
* Adds a custom field with a value generator function.
233
*
234
* @param fieldName name of the field
235
* @param generator function to generate field values
236
* @return builder for fluent configuration
237
*/
238
public Builder addCustomField(String fieldName, Supplier<Object> generator);
239
240
/**
241
* Sets the probability that fields will be present in generated documents.
242
*
243
* @param probability field presence probability (0.0 to 1.0)
244
* @return builder for fluent configuration
245
*/
246
public Builder withFieldPresenceProbability(double probability);
247
248
/**
249
* Builds the template from the configured fields.
250
*
251
* @return configured Template instance
252
*/
253
public Template build();
254
}
255
256
/**
257
* Creates a new template builder.
258
*
259
* @return Template.Builder for fluent configuration
260
*/
261
public static Builder builder();
262
263
/**
264
* Generates a document following this template.
265
*
266
* @param generator document generator to use
267
* @return generated document
268
* @throws IOException on generation failure
269
*/
270
public BytesReference generate(DocumentGenerator generator) throws IOException;
271
272
/**
273
* Returns the field configurations defined in this template.
274
*
275
* @return map of field name to field configuration
276
*/
277
public Map<String, FieldConfig> getFields();
278
279
/**
280
* Validates that the template configuration is valid.
281
*
282
* @throws IllegalArgumentException if template is invalid
283
*/
284
public void validate();
285
}
286
```
287
288
## Field Data Generation
289
290
### FieldDataGenerator
291
292
Base class for generating data for specific field types.
293
294
```{ .api }
295
package org.elasticsearch.logsdb.datageneration;
296
297
/**
298
* Abstract base class for generating data for specific Elasticsearch field types.
299
* Provides extensible framework for custom field data generation strategies.
300
*/
301
public abstract class FieldDataGenerator {
302
303
/**
304
* Configuration for field data generation.
305
*/
306
protected final FieldConfig config;
307
308
/**
309
* Creates a field data generator with the specified configuration.
310
*
311
* @param config field generation configuration
312
*/
313
protected FieldDataGenerator(FieldConfig config);
314
315
/**
316
* Generates a single field value.
317
*
318
* @return generated field value
319
*/
320
public abstract Object generateValue();
321
322
/**
323
* Generates multiple field values for array fields.
324
*
325
* @param count number of values to generate
326
* @return list of generated values
327
*/
328
public List<Object> generateValues(int count);
329
330
/**
331
* Indicates whether this field type supports null values.
332
*
333
* @return true if null values are supported
334
*/
335
public abstract boolean supportsNulls();
336
337
/**
338
* Generates a null value if supported by this field type.
339
*
340
* @return null or appropriate null representation
341
*/
342
public Object generateNull();
343
344
/**
345
* Creates a field data generator for the specified Elasticsearch field type.
346
*
347
* @param fieldType Elasticsearch field type name
348
* @param config field configuration
349
* @return appropriate FieldDataGenerator implementation
350
*/
351
public static FieldDataGenerator forFieldType(String fieldType, FieldConfig config);
352
}
353
```
354
355
### Specialized Field Generators
356
357
```{ .api }
358
package org.elasticsearch.logsdb.datageneration.fields;
359
360
/**
361
* Generator for text field data with realistic content patterns.
362
*/
363
public class TextFieldGenerator extends FieldDataGenerator {
364
365
/**
366
* Configuration for text field generation.
367
*/
368
public static class TextConfig extends FieldConfig {
369
/** Minimum text length */
370
public final int minLength;
371
/** Maximum text length */
372
public final int maxLength;
373
/** Text generation strategy */
374
public final TextGenerationStrategy strategy;
375
/** Language for generated text */
376
public final Locale locale;
377
378
public TextConfig(int minLength,
379
int maxLength,
380
TextGenerationStrategy strategy,
381
Locale locale);
382
}
383
384
/**
385
* Enumeration of text generation strategies.
386
*/
387
public enum TextGenerationStrategy {
388
/** Random alphanumeric strings */
389
ALPHANUMERIC,
390
/** Lorem ipsum style text */
391
LOREM_IPSUM,
392
/** Realistic sentences with grammar */
393
SENTENCES,
394
/** Log message patterns */
395
LOG_MESSAGES,
396
/** Email addresses */
397
EMAIL_ADDRESSES,
398
/** URLs */
399
URLS
400
}
401
402
@Override
403
public Object generateValue() {
404
TextConfig config = (TextConfig) this.config;
405
406
switch (config.strategy) {
407
case ALPHANUMERIC:
408
return generateAlphanumeric(config.minLength, config.maxLength);
409
case LOREM_IPSUM:
410
return generateLoremIpsum(config.minLength, config.maxLength);
411
case SENTENCES:
412
return generateSentences(config.minLength, config.maxLength, config.locale);
413
case LOG_MESSAGES:
414
return generateLogMessage();
415
case EMAIL_ADDRESSES:
416
return generateEmailAddress();
417
case URLS:
418
return generateUrl();
419
default:
420
throw new IllegalArgumentException("Unknown strategy: " + config.strategy);
421
}
422
}
423
}
424
425
/**
426
* Generator for keyword field data with controlled vocabularies.
427
*/
428
public class KeywordFieldGenerator extends FieldDataGenerator {
429
430
/**
431
* Configuration for keyword field generation.
432
*/
433
public static class KeywordConfig extends FieldConfig {
434
/** Predefined vocabulary of possible values */
435
public final List<String> vocabulary;
436
/** Whether to generate values outside vocabulary */
437
public final boolean allowCustomValues;
438
/** Distribution strategy for value selection */
439
public final DistributionStrategy distribution;
440
441
public KeywordConfig(List<String> vocabulary,
442
boolean allowCustomValues,
443
DistributionStrategy distribution);
444
}
445
446
/**
447
* Value distribution strategies for keyword generation.
448
*/
449
public enum DistributionStrategy {
450
/** Uniform random selection */
451
UNIFORM,
452
/** Zipfian distribution (some values more frequent) */
453
ZIPFIAN,
454
/** Normal distribution around median values */
455
NORMAL
456
}
457
458
@Override
459
public Object generateValue() {
460
KeywordConfig config = (KeywordConfig) this.config;
461
462
if (config.allowCustomValues && random().nextDouble() < 0.1) {
463
// 10% chance of generating custom value
464
return "custom_" + randomAlphaOfLength(8);
465
}
466
467
return selectFromVocabulary(config.vocabulary, config.distribution);
468
}
469
}
470
471
/**
472
* Generator for numeric field data with realistic distributions.
473
*/
474
public class NumericFieldGenerator extends FieldDataGenerator {
475
476
/**
477
* Configuration for numeric field generation.
478
*/
479
public static class NumericConfig extends FieldConfig {
480
/** Numeric field type */
481
public final NumericType type;
482
/** Minimum value (inclusive) */
483
public final Number minValue;
484
/** Maximum value (inclusive) */
485
public final Number maxValue;
486
/** Distribution type for value generation */
487
public final NumericDistribution distribution;
488
489
public NumericConfig(NumericType type,
490
Number minValue,
491
Number maxValue,
492
NumericDistribution distribution);
493
}
494
495
/**
496
* Numeric field types supported by Elasticsearch.
497
*/
498
public enum NumericType {
499
BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE, SCALED_FLOAT
500
}
501
502
/**
503
* Distribution patterns for numeric value generation.
504
*/
505
public enum NumericDistribution {
506
/** Uniform random distribution */
507
UNIFORM,
508
/** Normal (Gaussian) distribution */
509
NORMAL,
510
/** Exponential distribution */
511
EXPONENTIAL,
512
/** Log-normal distribution */
513
LOG_NORMAL
514
}
515
516
@Override
517
public Object generateValue() {
518
NumericConfig config = (NumericConfig) this.config;
519
520
double rawValue = generateDistributedValue(config.distribution,
521
config.minValue.doubleValue(),
522
config.maxValue.doubleValue());
523
524
return convertToTargetType(rawValue, config.type);
525
}
526
}
527
```
528
529
### PredefinedField
530
531
Configuration for predefined field types with specific generation rules.
532
533
```{ .api }
534
package org.elasticsearch.logsdb.datageneration;
535
536
/**
537
* Represents a predefined field configuration with specific generation rules
538
* and constraints. Used to create consistent field behavior across documents.
539
*/
540
public class PredefinedField {
541
542
/** Field name */
543
private final String name;
544
/** Field type (text, keyword, long, etc.) */
545
private final String type;
546
/** Field-specific configuration */
547
private final FieldConfig config;
548
/** Mapping properties for this field */
549
private final Map<String, Object> mapping;
550
551
/**
552
* Creates a predefined field configuration.
553
*
554
* @param name field name
555
* @param type Elasticsearch field type
556
* @param config field generation configuration
557
* @param mapping field mapping properties
558
*/
559
public PredefinedField(String name,
560
String type,
561
FieldConfig config,
562
Map<String, Object> mapping);
563
564
/**
565
* Creates a text field with default configuration.
566
*
567
* @param name field name
568
* @return configured PredefinedField for text
569
*/
570
public static PredefinedField text(String name);
571
572
/**
573
* Creates a text field with custom configuration.
574
*
575
* @param name field name
576
* @param analyzer analyzer to use
577
* @param minLength minimum text length
578
* @param maxLength maximum text length
579
* @return configured PredefinedField for text
580
*/
581
public static PredefinedField text(String name,
582
String analyzer,
583
int minLength,
584
int maxLength);
585
586
/**
587
* Creates a keyword field with specified vocabulary.
588
*
589
* @param name field name
590
* @param vocabulary possible keyword values
591
* @return configured PredefinedField for keyword
592
*/
593
public static PredefinedField keyword(String name, String... vocabulary);
594
595
/**
596
* Creates a long field with specified range.
597
*
598
* @param name field name
599
* @param min minimum value
600
* @param max maximum value
601
* @return configured PredefinedField for long
602
*/
603
public static PredefinedField longField(String name, long min, long max);
604
605
/**
606
* Creates a date field with specified format and range.
607
*
608
* @param name field name
609
* @param format date format pattern
610
* @param start earliest date
611
* @param end latest date
612
* @return configured PredefinedField for date
613
*/
614
public static PredefinedField dateField(String name,
615
String format,
616
Instant start,
617
Instant end);
618
619
/**
620
* Creates a boolean field with specified true probability.
621
*
622
* @param name field name
623
* @param trueProbability probability of true values (0.0 to 1.0)
624
* @return configured PredefinedField for boolean
625
*/
626
public static PredefinedField booleanField(String name, double trueProbability);
627
628
/**
629
* Creates a geo_point field with specified bounds.
630
*
631
* @param name field name
632
* @param bounds geographic bounding box
633
* @return configured PredefinedField for geo_point
634
*/
635
public static PredefinedField geoPoint(String name, Rectangle bounds);
636
637
/**
638
* Creates an IP address field.
639
*
640
* @param name field name
641
* @param ipVersion IP version (4 or 6)
642
* @return configured PredefinedField for ip
643
*/
644
public static PredefinedField ipField(String name, int ipVersion);
645
646
/**
647
* Generates a value for this field using the configured generator.
648
*
649
* @return generated field value
650
*/
651
public Object generateValue();
652
653
/**
654
* Returns the field mapping configuration.
655
*
656
* @return field mapping as Map
657
*/
658
public Map<String, Object> getMapping();
659
660
/**
661
* Returns the field name.
662
*
663
* @return field name
664
*/
665
public String getName();
666
667
/**
668
* Returns the Elasticsearch field type.
669
*
670
* @return field type
671
*/
672
public String getType();
673
}
674
```
675
676
## Usage Examples
677
678
### Basic Document Generation
679
680
```java
681
import org.elasticsearch.logsdb.datageneration.DocumentGenerator;
682
import org.elasticsearch.logsdb.datageneration.Template;
683
684
public class DataGenerationTest extends ESTestCase {
685
686
public void testBasicDocumentGeneration() throws IOException {
687
DocumentGenerator generator = new DocumentGenerator();
688
689
// Generate a single random document
690
BytesReference document = generator.generate();
691
assertThat(document, notNullValue());
692
693
// Parse and validate structure
694
XContentParser parser = createParser(XContentType.JSON, document);
695
Map<String, Object> source = parser.map();
696
assertThat(source.size(), greaterThan(0));
697
}
698
699
public void testTemplateBasedGeneration() throws IOException {
700
Template template = Template.builder()
701
.addTextField("title", new TextFieldGenerator.TextConfig(
702
10, 50, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))
703
.addKeywordField("category", "news", "sports", "technology", "entertainment")
704
.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 0L, 10000L)
705
.addDateField("published", "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
706
Instant.now().minus(365, ChronoUnit.DAYS), Instant.now())
707
.addBooleanField("featured", 0.2) // 20% chance of being featured
708
.build();
709
710
DocumentGenerator generator = new DocumentGenerator();
711
712
// Generate multiple documents with consistent schema
713
for (int i = 0; i < 10; i++) {
714
BytesReference doc = template.generate(generator);
715
716
XContentParser parser = createParser(XContentType.JSON, doc);
717
Map<String, Object> source = parser.map();
718
719
// Verify expected fields are present
720
assertTrue("Should have title", source.containsKey("title"));
721
assertTrue("Should have category", source.containsKey("category"));
722
assertTrue("Should have views", source.containsKey("views"));
723
724
// Verify field types and constraints
725
assertThat(source.get("title"), instanceOf(String.class));
726
assertThat(source.get("category"), anyOf(
727
equalTo("news"), equalTo("sports"),
728
equalTo("technology"), equalTo("entertainment")));
729
730
Long views = (Long) source.get("views");
731
assertThat(views, allOf(greaterThanOrEqualTo(0L), lessThanOrEqualTo(10000L)));
732
}
733
}
734
735
public void testPredefinedFieldGeneration() throws IOException {
736
List<PredefinedField> fields = Arrays.asList(
737
PredefinedField.text("message").withStrategy(LOG_MESSAGES),
738
PredefinedField.keyword("level", "DEBUG", "INFO", "WARN", "ERROR"),
739
PredefinedField.longField("timestamp",
740
Instant.now().minus(7, ChronoUnit.DAYS).toEpochMilli(),
741
Instant.now().toEpochMilli()),
742
PredefinedField.ipField("client_ip", 4),
743
PredefinedField.geoPoint("location", new Rectangle(-180, 180, -90, 90))
744
);
745
746
DocumentGenerator generator = new DocumentGenerator()
747
.withPredefinedFields(fields)
748
.withSeed(12345); // Reproducible generation
749
750
// Generate log-like documents
751
List<BytesReference> documents = generator.generateBatch(100);
752
753
assertThat(documents.size(), equalTo(100));
754
755
// Verify all documents have expected structure
756
for (BytesReference doc : documents) {
757
XContentParser parser = createParser(XContentType.JSON, doc);
758
Map<String, Object> source = parser.map();
759
760
assertThat(source, hasKey("message"));
761
assertThat(source, hasKey("level"));
762
assertThat(source, hasKey("timestamp"));
763
assertThat(source, hasKey("client_ip"));
764
assertThat(source, hasKey("location"));
765
}
766
}
767
}
768
```
769
770
### Integration with Index Creation
771
772
```java
773
public class DocumentGenerationIntegrationTest extends ESIntegTestCase {
774
775
public void testGeneratedDataIndexing() throws IOException {
776
// Create index with mapping for generated data
777
String mapping = """
778
{
779
"properties": {
780
"title": {"type": "text", "analyzer": "english"},
781
"category": {"type": "keyword"},
782
"views": {"type": "long"},
783
"published": {"type": "date"},
784
"featured": {"type": "boolean"},
785
"tags": {"type": "keyword"}
786
}
787
}""";
788
789
createIndex("test-index", Settings.EMPTY, mapping);
790
791
// Generate and index documents
792
Template template = Template.builder()
793
.addTextField("title", new TextFieldGenerator.TextConfig(
794
20, 100, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))
795
.addKeywordField("category", "technology", "science", "business")
796
.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 1L, 100000L)
797
.addDateField("published", "yyyy-MM-dd",
798
Instant.parse("2023-01-01T00:00:00Z"), Instant.now())
799
.addBooleanField("featured", 0.15)
800
.addArrayField("tags",
801
Template.builder().addKeywordField("tag",
802
"ai", "machine-learning", "elasticsearch", "search", "analytics").build(),
803
1, 5)
804
.build();
805
806
DocumentGenerator generator = new DocumentGenerator();
807
808
// Index generated documents
809
BulkRequestBuilder bulk = client().prepareBulk();
810
for (int i = 0; i < 1000; i++) {
811
Map<String, Object> source = generator.generateSource();
812
bulk.add(client().prepareIndex("test-index")
813
.setId(String.valueOf(i))
814
.setSource(source));
815
}
816
817
BulkResponse bulkResponse = bulk.get();
818
assertNoFailures(bulkResponse);
819
820
refresh("test-index");
821
822
// Verify generated data is searchable
823
SearchResponse response = client().prepareSearch("test-index")
824
.setQuery(QueryBuilders.matchQuery("title", "test"))
825
.setSize(0)
826
.get();
827
828
assertThat(response.getHits().getTotalHits().value, greaterThan(0L));
829
830
// Verify aggregations work on generated data
831
response = client().prepareSearch("test-index")
832
.setQuery(QueryBuilders.matchAllQuery())
833
.addAggregation(AggregationBuilders.terms("categories").field("category"))
834
.addAggregation(AggregationBuilders.stats("view_stats").field("views"))
835
.get();
836
837
Terms categories = response.getAggregations().get("categories");
838
assertThat(categories.getBuckets().size(), equalTo(3));
839
840
Stats viewStats = response.getAggregations().get("view_stats");
841
assertThat(viewStats.getCount(), equalTo(1000L));
842
assertThat(viewStats.getMin(), greaterThanOrEqualTo(1.0));
843
assertThat(viewStats.getMax(), lessThanOrEqualTo(100000.0));
844
}
845
}
846
```
847
848
## Best Practices
849
850
### Data Realism
851
- Use realistic value distributions rather than purely random data
852
- Include edge cases and boundary values in generated data
853
- Model real-world data patterns and relationships
854
855
### Performance Optimization
856
- Use seeds for reproducible test data when debugging
857
- Generate data in batches for better performance
858
- Cache expensive generators when possible
859
860
### Schema Design
861
- Define clear templates for consistent document structure
862
- Use predefined fields for common data types
863
- Include proper field mapping configurations
864
865
### Testing Coverage
866
- Generate data that exercises all mapping types used in production
867
- Include null values and missing fields in test data
868
- Test with various document sizes and complexity levels
869
870
The data generation utilities provide powerful capabilities for creating realistic, varied test datasets that enable comprehensive testing of Elasticsearch functionality with production-like data patterns.