A local key/value store abstraction library for Apache Spark applications that provides automatic serialization, indexing, and key management features.
—
Automatic index creation and management using field and method annotations for efficient data access. The indexing system enables fast queries and sorting without loading all data into memory.
Marks fields or methods for indexing in the store, enabling efficient queries and sorted iteration.
/**
* Tags a field to be indexed when storing an object.
*
* Types are required to have a natural index that uniquely identifies instances in the store.
* The default value of the annotation identifies the natural index for the type.
*
* Indexes allow for more efficient sorting of data read from the store. By annotating a field or
* "getter" method with this annotation, an index will be created that will provide sorting based on
* the string value of that field.
*
* Note that creating indices means more space will be needed, and maintenance operations like
* updating or deleting a value will become more expensive.
*
* Indices are restricted to String, integral types (byte, short, int, long, boolean), and arrays
* of those values.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface KVIndex {
/**
* The name of the index to be created for the annotated entity. Must be unique within
* the class. Index names are not allowed to start with an underscore (that's reserved for
* internal use). The default value is the natural index name (which is always a copy index
* regardless of the annotation's values).
*/
String value() default NATURAL_INDEX_NAME;
/**
* The name of the parent index of this index. By default there is no parent index, so the
* generated data can be retrieved without having to provide a parent value.
*
* If a parent index is defined, iterating over the data using the index will require providing
* a single value for the parent index. This serves as a rudimentary way to provide relationships
* between entities in the store.
*/
String parent() default "";
/**
* Whether to copy the instance's data to the index, instead of just storing a pointer to the
* data. The default behavior is to just store a reference; that saves disk space but is slower
* to read, since there's a level of indirection.
*/
boolean copy() default false;
}
/**
* Constant for the natural index name used internally by the KVStore system.
*/
String NATURAL_INDEX_NAME = "__main__";Annotate fields to create indices for efficient querying and sorting.
Usage Examples:
public class Person {
@KVIndex // Natural index (required) - uniquely identifies the object
public String id;
@KVIndex("name") // Create an index on the name field
public String name;
@KVIndex("age") // Create an index on the age field
public int age;
@KVIndex("active") // Create an index on the active field
public boolean active;
public Person(String id, String name, int age, boolean active) {
this.id = id;
this.name = name;
this.age = age;
this.active = active;
}
}
// Usage with the indexed class
KVStore store = new InMemoryStore();
// Write data
store.write(new Person("p1", "Alice", 30, true));
store.write(new Person("p2", "Bob", 25, false));
store.write(new Person("p3", "Charlie", 35, true));
// Query by name index
for (Person p : store.view(Person.class).index("name").first("Alice").last("Bob")) {
System.out.println(p.name);
}
// Query by age index
for (Person p : store.view(Person.class).index("age").first(25).last(30)) {
System.out.println(p.name + " is " + p.age);
}Annotate getter methods to create indices based on computed values.
Usage Examples:
public class Employee {
@KVIndex
public String employeeId;
public String firstName;
public String lastName;
public Date birthDate;
@KVIndex("fullName")
public String getFullName() {
return firstName + " " + lastName;
}
@KVIndex("birthYear")
public int getBirthYear() {
return birthDate.getYear() + 1900; // Date.getYear() returns year - 1900
}
@KVIndex("displayName")
public String getDisplayName() {
return lastName + ", " + firstName;
}
}
// Usage
Employee emp = new Employee();
emp.employeeId = "emp123";
emp.firstName = "John";
emp.lastName = "Doe";
emp.birthDate = new Date(85, 5, 15); // June 15, 1985
store.write(emp);
// Query by computed fullName index
for (Employee e : store.view(Employee.class).index("fullName").first("John Doe")) {
System.out.println("Found: " + e.getDisplayName());
}
// Query by computed birthYear index
for (Employee e : store.view(Employee.class).index("birthYear").first(1985).last(1990)) {
System.out.println(e.getFullName() + " born in " + e.getBirthYear());
}Create parent-child index relationships for hierarchical data structures.
Usage Examples:
public class Task {
@KVIndex
public String taskId;
@KVIndex("projectId") // Parent index
public String projectId;
@KVIndex(value = "priority", parent = "projectId") // Child index with parent
public String priority;
@KVIndex(value = "status", parent = "projectId") // Another child index
public String status;
public String title;
public Task(String taskId, String projectId, String priority, String status, String title) {
this.taskId = taskId;
this.projectId = projectId;
this.priority = priority;
this.status = status;
this.title = title;
}
}
// Usage with hierarchical indices
KVStore store = new LevelDB(new File("./tasks"));
// Add tasks for different projects
store.write(new Task("t1", "proj1", "high", "open", "Implement feature A"));
store.write(new Task("t2", "proj1", "medium", "open", "Write tests"));
store.write(new Task("t3", "proj1", "high", "closed", "Fix bug #123"));
store.write(new Task("t4", "proj2", "low", "open", "Update documentation"));
// Query high priority tasks for project 1
KVStoreView<Task> highPriorityProj1 = store.view(Task.class)
.index("priority")
.parent("proj1") // Filter by parent project ID
.first("high")
.last("high");
// Query all tasks for project 1 by status
KVStoreView<Task> proj1ByStatus = store.view(Task.class)
.index("status")
.parent("proj1");
// Query open tasks for project 2
KVStoreView<Task> openProj2 = store.view(Task.class)
.index("status")
.parent("proj2")
.first("open")
.last("open");Control whether index entries store full object data or just references.
Usage Examples:
public class Product {
@KVIndex
public String productId;
@KVIndex(value = "category", copy = false) // Reference index (default)
public String category;
@KVIndex(value = "price", copy = true) // Copy index - stores full object data
public double price;
@KVIndex(value = "inStock", copy = true) // Copy index for fast access
public boolean inStock;
public String name;
public String description;
public Product(String id, String category, double price, boolean inStock, String name) {
this.productId = id;
this.category = category;
this.price = price;
this.inStock = inStock;
this.name = name;
}
}
// Copy indices provide faster iteration since no additional lookups are needed
// Reference indices save disk space but require additional lookups during iteration
// Fast iteration over products by price (copy index)
for (Product p : store.view(Product.class).index("price").first(10.0).last(50.0)) {
// No additional lookup needed - object data stored in index
System.out.println(p.name + ": $" + p.price);
}
// Slower iteration over products by category (reference index)
for (Product p : store.view(Product.class).index("category").first("electronics")) {
// Additional lookup needed to get full object data
System.out.println(p.name + " in " + p.category);
}Index array fields for multi-value queries.
Usage Examples:
public class Document {
@KVIndex
public String documentId;
@KVIndex("tags")
public String[] tags;
@KVIndex("scores")
public int[] scores;
public String title;
public Document(String id, String title, String[] tags, int[] scores) {
this.documentId = id;
this.title = title;
this.tags = tags;
this.scores = scores;
}
}
// Usage with array indices
Document doc1 = new Document("doc1", "Java Guide",
new String[]{"java", "programming", "tutorial"},
new int[]{95, 87, 92});
Document doc2 = new Document("doc2", "Python Basics",
new String[]{"python", "programming", "beginner"},
new int[]{88, 91, 85});
store.write(doc1);
store.write(doc2);
// Note: Array indexing creates entries for each array element
// Query documents with "programming" tag
for (Document d : store.view(Document.class).index("tags").first("programming").last("programming")) {
System.out.println("Found: " + d.title);
}Access index metadata and configuration for stored types.
/**
* Wrapper around types managed in a KVStore, providing easy access to their indexed fields.
*/
public class KVTypeInfo {
public KVTypeInfo(Class<?> type);
public Class<?> type();
public Object getIndexValue(String indexName, Object instance) throws Exception;
public Stream<KVIndex> indices();
}Usage Examples:
// Introspect a class to see its indices
KVTypeInfo typeInfo = new KVTypeInfo(Person.class);
System.out.println("Type: " + typeInfo.type().getSimpleName());
System.out.println("Indices:");
typeInfo.indices().forEach(index -> {
System.out.println(" - " + index.value() +
(index.parent().isEmpty() ? "" : " (parent: " + index.parent() + ")") +
(index.copy() ? " [copy]" : " [reference]"));
});
// Get index value from an instance
Person person = new Person("p1", "Alice", 30, true);
String nameValue = (String) typeInfo.getIndexValue("name", person);
Integer ageValue = (Integer) typeInfo.getIndexValue("age", person);Internal interface for accessing field and method values used by the type introspection system.
/**
* Abstracts the difference between invoking a Field and a Method.
*/
interface Accessor {
/**
* Get the value from the specified instance.
* @param instance - The object instance to access
* @return The field or method return value
* @throws ReflectiveOperationException If access fails
*/
Object get(Object instance) throws ReflectiveOperationException;
/**
* Get the type of the field or method return value.
* @return The field type or method return type
*/
Class<?> getType();
}Install with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-kvstore