The KVStore indexing system provides efficient data access and sorting through field and method annotations. Indices enable fast queries without loading all data and support hierarchical relationships between entities.
Tags fields or methods to be indexed when storing objects, enabling efficient sorting and querying.
/**
* Tags a field or method to be indexed when storing an object.
* Supports String, integral types (byte, short, int, long, boolean), and arrays.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface KVIndex {
/**
* The natural index name constant.
*/
String NATURAL_INDEX_NAME = "__main__";
/**
* The name of the index to be created for the annotated entity.
* Must be unique within the class. Cannot start with underscore (reserved).
* @return The index name (default: natural index name)
*/
String value() default NATURAL_INDEX_NAME;
/**
* The name of the parent index of this index.
* Creates hierarchical relationships between entities.
* @return The parent index name (default: no parent)
*/
String parent() default "";
/**
* Whether to copy the instance's data to the index.
* Copy mode saves disk space but is slower to read due to indirection.
* @return true to copy data, false to store pointer (default: false)
*/
boolean copy() default false;
}KVTypeInfo provides introspection and access to indexed fields for stored objects.
/**
* Wrapper around types managed in KVStore, providing access to indexed fields.
* This class introspects types to find @KVIndex annotations on fields and methods.
*/
public class KVTypeInfo {
/**
* Create type information by introspecting the given type for indices.
* @param type The class to introspect
*/
public KVTypeInfo(Class<?> type);
/**
* Returns the managed type.
* @return The class type being managed
*/
public Class<?> type();
/**
* Gets the index value from an instance using reflection.
* @param indexName The name of the index
* @param instance The object instance to extract value from
* @return The index value for the given instance
* @throws Exception if reflection fails
*/
public Object getIndexValue(String indexName, Object instance) throws Exception;
/**
* Returns a stream of all indices for the type.
* @return Stream of KVIndex annotations
*/
public Stream<KVIndex> indices();
}Usage Examples:
import org.apache.spark.util.kvstore.KVIndex;
public class Employee {
// Natural index - unique identifier (required)
@KVIndex
public String employeeId;
// Secondary indices for efficient querying
@KVIndex("department")
public String department;
@KVIndex("salary")
public long salary;
@KVIndex("hireDate")
public java.time.LocalDate hireDate;
@KVIndex("active")
public boolean isActive;
// Constructor
public Employee(String id, String dept, long salary, LocalDate hireDate, boolean active) {
this.employeeId = id;
this.department = dept;
this.salary = salary;
this.hireDate = hireDate;
this.isActive = active;
}
}
// Store and query employees
KVStore store = new LevelDB(new File("/path/to/store"));
// Store employees
store.write(new Employee("E001", "Engineering", 75000, LocalDate.of(2020, 1, 15), true));
store.write(new Employee("E002", "Marketing", 65000, LocalDate.of(2019, 6, 10), true));
store.write(new Employee("E003", "Engineering", 80000, LocalDate.of(2021, 3, 20), false));
// Query by department
KVStoreView<Employee> engineers = store.view(Employee.class)
.index("department")
.first("Engineering")
.last("Engineering");
// Query by salary range (70k-85k)
KVStoreView<Employee> highEarners = store.view(Employee.class)
.index("salary")
.first(70000L)
.last(85000L);
// Query active employees
KVStoreView<Employee> activeEmployees = store.view(Employee.class)
.index("active")
.first(true)
.last(true);public class Task {
@KVIndex
private String taskId;
private java.time.LocalDateTime createdAt;
private java.time.LocalDateTime completedAt;
// Index based on method result
@KVIndex("dayOfYear")
public int getCreationDayOfYear() {
return createdAt.getDayOfYear();
}
@KVIndex("duration")
public long getDurationMinutes() {
if (completedAt == null) return -1;
return java.time.Duration.between(createdAt, completedAt).toMinutes();
}
// Getter for natural key
public String getTaskId() { return taskId; }
// Constructor and other methods...
}
// Query tasks by creation day
KVStoreView<Task> todaysTasks = store.view(Task.class)
.index("dayOfYear")
.first(LocalDateTime.now().getDayOfYear())
.last(LocalDateTime.now().getDayOfYear());public class ProjectTask {
@KVIndex
public String taskId;
// Parent index for project grouping
@KVIndex("project")
public String projectId;
// Child index that depends on project
@KVIndex(value = "priority", parent = "project")
public int priority;
@KVIndex(value = "status", parent = "project")
public String status;
public ProjectTask(String taskId, String projectId, int priority, String status) {
this.taskId = taskId;
this.projectId = projectId;
this.priority = priority;
this.status = status;
}
}
// Query tasks within a specific project
KVStoreView<ProjectTask> projectTasks = store.view(ProjectTask.class)
.index("project")
.first("PROJECT-123")
.last("PROJECT-123");
// Query high-priority tasks within a project
KVStoreView<ProjectTask> highPriorityTasks = store.view(ProjectTask.class)
.index("priority")
.parent("PROJECT-123") // Required for child index
.first(8)
.reverse();
// Query running tasks within a project
KVStoreView<ProjectTask> runningTasks = store.view(ProjectTask.class)
.index("status")
.parent("PROJECT-123")
.first("running")
.last("running");public class Document {
@KVIndex
public String documentId;
// Pointer index (default) - saves space, slower reads
@KVIndex("category")
public String category;
// Copy index - faster reads, uses more space
@KVIndex(value = "priority", copy = true)
public int priority;
// Large object - use pointer to save space
@KVIndex("contentHash")
public String contentHash;
public byte[] content; // Large data not indexed
public Document(String id, String category, int priority, String hash, byte[] content) {
this.documentId = id;
this.category = category;
this.priority = priority;
this.contentHash = hash;
this.content = content;
}
}public class Product {
@KVIndex
public String productId;
// Array index - each tag creates a separate index entry
@KVIndex("tags")
public String[] tags;
@KVIndex("categories")
public int[] categoryIds;
public Product(String id, String[] tags, int[] categories) {
this.productId = id;
this.tags = tags;
this.categoryIds = categories;
}
}
// Query products by tag
KVStoreView<Product> electronicsProducts = store.view(Product.class)
.index("tags")
.first("electronics")
.last("electronics");Primitive Types:
String - Text indexing with lexicographic orderingbyte, short, int, long - Numeric indexing with natural orderingboolean - Boolean indexing (false < true)Array Types:
String[] - Each array element creates separate index entriesbyte[], short[], int[], long[] - Numeric array indexingboolean[] - Boolean array indexingTemporal Types:
java.time.LocalDate - Date indexingjava.time.LocalDateTime - Date-time indexingjava.util.Date - Legacy date indexing// Index selection affects query performance
public class OptimizedEntity {
@KVIndex
public String id;
// High-cardinality index - good for exact matches
@KVIndex("userId")
public String userId;
// Low-cardinality index - good for filtering
@KVIndex("status")
public String status; // "active", "inactive", "pending"
// Range queries benefit from ordered indices
@KVIndex("timestamp")
public long timestamp;
// Copy index for frequently accessed small data
@KVIndex(value = "priority", copy = true)
public int priority;
// Don't index large, rarely queried fields
public String description; // No @KVIndex annotation
public byte[] largeData; // No @KVIndex annotation
}
// Efficient query patterns
// 1. Exact match on high-cardinality index
KVStoreView<OptimizedEntity> userEntities = store.view(OptimizedEntity.class)
.index("userId")
.first("user-12345")
.last("user-12345");
// 2. Range query on ordered index
KVStoreView<OptimizedEntity> recentEntities = store.view(OptimizedEntity.class)
.index("timestamp")
.first(System.currentTimeMillis() - 86400000); // Last 24 hours
// 3. Filter by low-cardinality index
KVStoreView<OptimizedEntity> activeEntities = store.view(OptimizedEntity.class)
.index("status")
.first("active")
.last("active");