Test framework library for Elasticsearch providing comprehensive testing utilities, base test classes, cluster management, and assertion helpers for unit and integration testing of Elasticsearch plugins and applications
—
The Elasticsearch test framework provides comprehensive data generation utilities for creating realistic test documents, field data, and synthetic datasets. These utilities enable testing with varied, randomized data that closely mimics production scenarios.
Core utility for generating synthetic documents with configurable schemas and data patterns.
package org.elasticsearch.logsdb.datageneration;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
/**
* Generates synthetic documents for testing with configurable field types,
* data distributions, and document structures. Supports complex nested
* objects and realistic data patterns.
*/
public class DocumentGenerator {
/**
* Configuration for document generation behavior.
*/
public static class Config {
/** Maximum nesting depth for objects */
public final int maxObjectDepth;
/** Maximum number of fields per object */
public final int maxFields;
/** Probability of generating null values */
public final double nullValueProbability;
/** Field name generation strategy */
public final FieldNamingStrategy namingStrategy;
public Config(int maxObjectDepth,
int maxFields,
double nullValueProbability,
FieldNamingStrategy namingStrategy);
}
/**
* Creates a DocumentGenerator with the specified configuration.
*
* @param config generation configuration
*/
public DocumentGenerator(Config config);
/**
* Creates a DocumentGenerator with default configuration.
*/
public DocumentGenerator();
/**
* Generates a single document with random structure and content.
*
* @return generated document as XContent bytes
* @throws IOException on serialization failure
*/
public BytesReference generate() throws IOException;
/**
* Generates a document with the specified XContent type.
*
* @param contentType XContent format to use
* @return generated document as XContent bytes
* @throws IOException on serialization failure
*/
public BytesReference generate(XContentType contentType) throws IOException;
/**
* Generates a document following the provided template structure.
*
* @param template document template defining structure
* @return generated document matching template
* @throws IOException on serialization failure
*/
public BytesReference generateFromTemplate(Template template) throws IOException;
/**
* Generates multiple documents with consistent schema but varied content.
*
* @param count number of documents to generate
* @return list of generated documents
* @throws IOException on generation failure
*/
public List<BytesReference> generateBatch(int count) throws IOException;
/**
* Generates a document and writes it directly to an XContentBuilder.
*
* @param builder XContent builder to write to
* @throws IOException on write failure
*/
public void generateInto(XContentBuilder builder) throws IOException;
/**
* Generates document source suitable for indexing operations.
*
* @return document source as Map
* @throws IOException on generation failure
*/
public Map<String, Object> generateSource() throws IOException;
/**
* Sets a seed for reproducible document generation.
*
* @param seed random seed for reproducible generation
* @return this DocumentGenerator for fluent configuration
*/
public DocumentGenerator withSeed(long seed);
/**
* Configures the generator to use predefined field configurations.
*
* @param fields predefined field configurations
* @return this DocumentGenerator for fluent configuration
*/
public DocumentGenerator withPredefinedFields(List<PredefinedField> fields);
/**
* Configures dynamic mapping behavior for generated documents.
*
* @param dynamicMapping dynamic mapping configuration
* @return this DocumentGenerator for fluent configuration
*/
public DocumentGenerator withDynamicMapping(DynamicMapping dynamicMapping);
}Document template system for defining consistent document structures.
package org.elasticsearch.logsdb.datageneration;
import java.util.Map;
import java.util.function.Supplier;
/**
* Defines a template for document generation with specified field types,
* constraints, and generation rules. Enables consistent document structure
* across multiple generated documents.
*/
public class Template {
/**
* Builder for creating document templates with fluent configuration.
*/
public static class Builder {
/**
* Adds a text field to the template.
*
* @param fieldName name of the field
* @param config text field configuration
* @return builder for fluent configuration
*/
public Builder addTextField(String fieldName, TextFieldConfig config);
/**
* Adds a keyword field to the template.
*
* @param fieldName name of the field
* @param values possible keyword values
* @return builder for fluent configuration
*/
public Builder addKeywordField(String fieldName, String... values);
/**
* Adds a numeric field to the template.
*
* @param fieldName name of the field
* @param type numeric type (int, long, float, double)
* @param min minimum value (inclusive)
* @param max maximum value (inclusive)
* @return builder for fluent configuration
*/
public Builder addNumericField(String fieldName,
NumericType type,
Number min,
Number max);
/**
* Adds a date field to the template.
*
* @param fieldName name of the field
* @param format date format pattern
* @param startDate earliest possible date
* @param endDate latest possible date
* @return builder for fluent configuration
*/
public Builder addDateField(String fieldName,
String format,
Instant startDate,
Instant endDate);
/**
* Adds a boolean field to the template.
*
* @param fieldName name of the field
* @param trueProbability probability of true values (0.0 to 1.0)
* @return builder for fluent configuration
*/
public Builder addBooleanField(String fieldName, double trueProbability);
/**
* Adds a nested object field to the template.
*
* @param fieldName name of the field
* @param nestedTemplate template for nested object structure
* @return builder for fluent configuration
*/
public Builder addObjectField(String fieldName, Template nestedTemplate);
/**
* Adds an array field to the template.
*
* @param fieldName name of the field
* @param elementTemplate template for array elements
* @param minSize minimum array size
* @param maxSize maximum array size
* @return builder for fluent configuration
*/
public Builder addArrayField(String fieldName,
Template elementTemplate,
int minSize,
int maxSize);
/**
* Adds a custom field with a value generator function.
*
* @param fieldName name of the field
* @param generator function to generate field values
* @return builder for fluent configuration
*/
public Builder addCustomField(String fieldName, Supplier<Object> generator);
/**
* Sets the probability that fields will be present in generated documents.
*
* @param probability field presence probability (0.0 to 1.0)
* @return builder for fluent configuration
*/
public Builder withFieldPresenceProbability(double probability);
/**
* Builds the template from the configured fields.
*
* @return configured Template instance
*/
public Template build();
}
/**
* Creates a new template builder.
*
* @return Template.Builder for fluent configuration
*/
public static Builder builder();
/**
* Generates a document following this template.
*
* @param generator document generator to use
* @return generated document
* @throws IOException on generation failure
*/
public BytesReference generate(DocumentGenerator generator) throws IOException;
/**
* Returns the field configurations defined in this template.
*
* @return map of field name to field configuration
*/
public Map<String, FieldConfig> getFields();
/**
* Validates that the template configuration is valid.
*
* @throws IllegalArgumentException if template is invalid
*/
public void validate();
}Base class for generating data for specific field types.
package org.elasticsearch.logsdb.datageneration;
/**
* Abstract base class for generating data for specific Elasticsearch field types.
* Provides extensible framework for custom field data generation strategies.
*/
public abstract class FieldDataGenerator {
/**
* Configuration for field data generation.
*/
protected final FieldConfig config;
/**
* Creates a field data generator with the specified configuration.
*
* @param config field generation configuration
*/
protected FieldDataGenerator(FieldConfig config);
/**
* Generates a single field value.
*
* @return generated field value
*/
public abstract Object generateValue();
/**
* Generates multiple field values for array fields.
*
* @param count number of values to generate
* @return list of generated values
*/
public List<Object> generateValues(int count);
/**
* Indicates whether this field type supports null values.
*
* @return true if null values are supported
*/
public abstract boolean supportsNulls();
/**
* Generates a null value if supported by this field type.
*
* @return null or appropriate null representation
*/
public Object generateNull();
/**
* Creates a field data generator for the specified Elasticsearch field type.
*
* @param fieldType Elasticsearch field type name
* @param config field configuration
* @return appropriate FieldDataGenerator implementation
*/
public static FieldDataGenerator forFieldType(String fieldType, FieldConfig config);
}package org.elasticsearch.logsdb.datageneration.fields;
/**
* Generator for text field data with realistic content patterns.
*/
public class TextFieldGenerator extends FieldDataGenerator {
/**
* Configuration for text field generation.
*/
public static class TextConfig extends FieldConfig {
/** Minimum text length */
public final int minLength;
/** Maximum text length */
public final int maxLength;
/** Text generation strategy */
public final TextGenerationStrategy strategy;
/** Language for generated text */
public final Locale locale;
public TextConfig(int minLength,
int maxLength,
TextGenerationStrategy strategy,
Locale locale);
}
/**
* Enumeration of text generation strategies.
*/
public enum TextGenerationStrategy {
/** Random alphanumeric strings */
ALPHANUMERIC,
/** Lorem ipsum style text */
LOREM_IPSUM,
/** Realistic sentences with grammar */
SENTENCES,
/** Log message patterns */
LOG_MESSAGES,
/** Email addresses */
EMAIL_ADDRESSES,
/** URLs */
URLS
}
@Override
public Object generateValue() {
TextConfig config = (TextConfig) this.config;
switch (config.strategy) {
case ALPHANUMERIC:
return generateAlphanumeric(config.minLength, config.maxLength);
case LOREM_IPSUM:
return generateLoremIpsum(config.minLength, config.maxLength);
case SENTENCES:
return generateSentences(config.minLength, config.maxLength, config.locale);
case LOG_MESSAGES:
return generateLogMessage();
case EMAIL_ADDRESSES:
return generateEmailAddress();
case URLS:
return generateUrl();
default:
throw new IllegalArgumentException("Unknown strategy: " + config.strategy);
}
}
}
/**
* Generator for keyword field data with controlled vocabularies.
*/
public class KeywordFieldGenerator extends FieldDataGenerator {
/**
* Configuration for keyword field generation.
*/
public static class KeywordConfig extends FieldConfig {
/** Predefined vocabulary of possible values */
public final List<String> vocabulary;
/** Whether to generate values outside vocabulary */
public final boolean allowCustomValues;
/** Distribution strategy for value selection */
public final DistributionStrategy distribution;
public KeywordConfig(List<String> vocabulary,
boolean allowCustomValues,
DistributionStrategy distribution);
}
/**
* Value distribution strategies for keyword generation.
*/
public enum DistributionStrategy {
/** Uniform random selection */
UNIFORM,
/** Zipfian distribution (some values more frequent) */
ZIPFIAN,
/** Normal distribution around median values */
NORMAL
}
@Override
public Object generateValue() {
KeywordConfig config = (KeywordConfig) this.config;
if (config.allowCustomValues && random().nextDouble() < 0.1) {
// 10% chance of generating custom value
return "custom_" + randomAlphaOfLength(8);
}
return selectFromVocabulary(config.vocabulary, config.distribution);
}
}
/**
* Generator for numeric field data with realistic distributions.
*/
public class NumericFieldGenerator extends FieldDataGenerator {
/**
* Configuration for numeric field generation.
*/
public static class NumericConfig extends FieldConfig {
/** Numeric field type */
public final NumericType type;
/** Minimum value (inclusive) */
public final Number minValue;
/** Maximum value (inclusive) */
public final Number maxValue;
/** Distribution type for value generation */
public final NumericDistribution distribution;
public NumericConfig(NumericType type,
Number minValue,
Number maxValue,
NumericDistribution distribution);
}
/**
* Numeric field types supported by Elasticsearch.
*/
public enum NumericType {
BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE, SCALED_FLOAT
}
/**
* Distribution patterns for numeric value generation.
*/
public enum NumericDistribution {
/** Uniform random distribution */
UNIFORM,
/** Normal (Gaussian) distribution */
NORMAL,
/** Exponential distribution */
EXPONENTIAL,
/** Log-normal distribution */
LOG_NORMAL
}
@Override
public Object generateValue() {
NumericConfig config = (NumericConfig) this.config;
double rawValue = generateDistributedValue(config.distribution,
config.minValue.doubleValue(),
config.maxValue.doubleValue());
return convertToTargetType(rawValue, config.type);
}
}Configuration for predefined field types with specific generation rules.
package org.elasticsearch.logsdb.datageneration;
/**
* Represents a predefined field configuration with specific generation rules
* and constraints. Used to create consistent field behavior across documents.
*/
public class PredefinedField {
/** Field name */
private final String name;
/** Field type (text, keyword, long, etc.) */
private final String type;
/** Field-specific configuration */
private final FieldConfig config;
/** Mapping properties for this field */
private final Map<String, Object> mapping;
/**
* Creates a predefined field configuration.
*
* @param name field name
* @param type Elasticsearch field type
* @param config field generation configuration
* @param mapping field mapping properties
*/
public PredefinedField(String name,
String type,
FieldConfig config,
Map<String, Object> mapping);
/**
* Creates a text field with default configuration.
*
* @param name field name
* @return configured PredefinedField for text
*/
public static PredefinedField text(String name);
/**
* Creates a text field with custom configuration.
*
* @param name field name
* @param analyzer analyzer to use
* @param minLength minimum text length
* @param maxLength maximum text length
* @return configured PredefinedField for text
*/
public static PredefinedField text(String name,
String analyzer,
int minLength,
int maxLength);
/**
* Creates a keyword field with specified vocabulary.
*
* @param name field name
* @param vocabulary possible keyword values
* @return configured PredefinedField for keyword
*/
public static PredefinedField keyword(String name, String... vocabulary);
/**
* Creates a long field with specified range.
*
* @param name field name
* @param min minimum value
* @param max maximum value
* @return configured PredefinedField for long
*/
public static PredefinedField longField(String name, long min, long max);
/**
* Creates a date field with specified format and range.
*
* @param name field name
* @param format date format pattern
* @param start earliest date
* @param end latest date
* @return configured PredefinedField for date
*/
public static PredefinedField dateField(String name,
String format,
Instant start,
Instant end);
/**
* Creates a boolean field with specified true probability.
*
* @param name field name
* @param trueProbability probability of true values (0.0 to 1.0)
* @return configured PredefinedField for boolean
*/
public static PredefinedField booleanField(String name, double trueProbability);
/**
* Creates a geo_point field with specified bounds.
*
* @param name field name
* @param bounds geographic bounding box
* @return configured PredefinedField for geo_point
*/
public static PredefinedField geoPoint(String name, Rectangle bounds);
/**
* Creates an IP address field.
*
* @param name field name
* @param ipVersion IP version (4 or 6)
* @return configured PredefinedField for ip
*/
public static PredefinedField ipField(String name, int ipVersion);
/**
* Generates a value for this field using the configured generator.
*
* @return generated field value
*/
public Object generateValue();
/**
* Returns the field mapping configuration.
*
* @return field mapping as Map
*/
public Map<String, Object> getMapping();
/**
* Returns the field name.
*
* @return field name
*/
public String getName();
/**
* Returns the Elasticsearch field type.
*
* @return field type
*/
public String getType();
}import org.elasticsearch.logsdb.datageneration.DocumentGenerator;
import org.elasticsearch.logsdb.datageneration.Template;
public class DataGenerationTest extends ESTestCase {
public void testBasicDocumentGeneration() throws IOException {
DocumentGenerator generator = new DocumentGenerator();
// Generate a single random document
BytesReference document = generator.generate();
assertThat(document, notNullValue());
// Parse and validate structure
XContentParser parser = createParser(XContentType.JSON, document);
Map<String, Object> source = parser.map();
assertThat(source.size(), greaterThan(0));
}
public void testTemplateBasedGeneration() throws IOException {
Template template = Template.builder()
.addTextField("title", new TextFieldGenerator.TextConfig(
10, 50, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))
.addKeywordField("category", "news", "sports", "technology", "entertainment")
.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 0L, 10000L)
.addDateField("published", "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
Instant.now().minus(365, ChronoUnit.DAYS), Instant.now())
.addBooleanField("featured", 0.2) // 20% chance of being featured
.build();
DocumentGenerator generator = new DocumentGenerator();
// Generate multiple documents with consistent schema
for (int i = 0; i < 10; i++) {
BytesReference doc = template.generate(generator);
XContentParser parser = createParser(XContentType.JSON, doc);
Map<String, Object> source = parser.map();
// Verify expected fields are present
assertTrue("Should have title", source.containsKey("title"));
assertTrue("Should have category", source.containsKey("category"));
assertTrue("Should have views", source.containsKey("views"));
// Verify field types and constraints
assertThat(source.get("title"), instanceOf(String.class));
assertThat(source.get("category"), anyOf(
equalTo("news"), equalTo("sports"),
equalTo("technology"), equalTo("entertainment")));
Long views = (Long) source.get("views");
assertThat(views, allOf(greaterThanOrEqualTo(0L), lessThanOrEqualTo(10000L)));
}
}
public void testPredefinedFieldGeneration() throws IOException {
List<PredefinedField> fields = Arrays.asList(
PredefinedField.text("message").withStrategy(LOG_MESSAGES),
PredefinedField.keyword("level", "DEBUG", "INFO", "WARN", "ERROR"),
PredefinedField.longField("timestamp",
Instant.now().minus(7, ChronoUnit.DAYS).toEpochMilli(),
Instant.now().toEpochMilli()),
PredefinedField.ipField("client_ip", 4),
PredefinedField.geoPoint("location", new Rectangle(-180, 180, -90, 90))
);
DocumentGenerator generator = new DocumentGenerator()
.withPredefinedFields(fields)
.withSeed(12345); // Reproducible generation
// Generate log-like documents
List<BytesReference> documents = generator.generateBatch(100);
assertThat(documents.size(), equalTo(100));
// Verify all documents have expected structure
for (BytesReference doc : documents) {
XContentParser parser = createParser(XContentType.JSON, doc);
Map<String, Object> source = parser.map();
assertThat(source, hasKey("message"));
assertThat(source, hasKey("level"));
assertThat(source, hasKey("timestamp"));
assertThat(source, hasKey("client_ip"));
assertThat(source, hasKey("location"));
}
}
}public class DocumentGenerationIntegrationTest extends ESIntegTestCase {
public void testGeneratedDataIndexing() throws IOException {
// Create index with mapping for generated data
String mapping = """
{
"properties": {
"title": {"type": "text", "analyzer": "english"},
"category": {"type": "keyword"},
"views": {"type": "long"},
"published": {"type": "date"},
"featured": {"type": "boolean"},
"tags": {"type": "keyword"}
}
}""";
createIndex("test-index", Settings.EMPTY, mapping);
// Generate and index documents
Template template = Template.builder()
.addTextField("title", new TextFieldGenerator.TextConfig(
20, 100, TextFieldGenerator.TextGenerationStrategy.SENTENCES, Locale.ENGLISH))
.addKeywordField("category", "technology", "science", "business")
.addNumericField("views", NumericFieldGenerator.NumericType.LONG, 1L, 100000L)
.addDateField("published", "yyyy-MM-dd",
Instant.parse("2023-01-01T00:00:00Z"), Instant.now())
.addBooleanField("featured", 0.15)
.addArrayField("tags",
Template.builder().addKeywordField("tag",
"ai", "machine-learning", "elasticsearch", "search", "analytics").build(),
1, 5)
.build();
DocumentGenerator generator = new DocumentGenerator();
// Index generated documents
BulkRequestBuilder bulk = client().prepareBulk();
for (int i = 0; i < 1000; i++) {
Map<String, Object> source = generator.generateSource();
bulk.add(client().prepareIndex("test-index")
.setId(String.valueOf(i))
.setSource(source));
}
BulkResponse bulkResponse = bulk.get();
assertNoFailures(bulkResponse);
refresh("test-index");
// Verify generated data is searchable
SearchResponse response = client().prepareSearch("test-index")
.setQuery(QueryBuilders.matchQuery("title", "test"))
.setSize(0)
.get();
assertThat(response.getHits().getTotalHits().value, greaterThan(0L));
// Verify aggregations work on generated data
response = client().prepareSearch("test-index")
.setQuery(QueryBuilders.matchAllQuery())
.addAggregation(AggregationBuilders.terms("categories").field("category"))
.addAggregation(AggregationBuilders.stats("view_stats").field("views"))
.get();
Terms categories = response.getAggregations().get("categories");
assertThat(categories.getBuckets().size(), equalTo(3));
Stats viewStats = response.getAggregations().get("view_stats");
assertThat(viewStats.getCount(), equalTo(1000L));
assertThat(viewStats.getMin(), greaterThanOrEqualTo(1.0));
assertThat(viewStats.getMax(), lessThanOrEqualTo(100000.0));
}
}The data generation utilities provide powerful capabilities for creating realistic, varied test datasets that enable comprehensive testing of Elasticsearch functionality with production-like data patterns.
Install with Tessl CLI
npx tessl i tessl/maven-org-elasticsearch--elasticsearch-test-framework