or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

core-operations.mdindex.mdindexing-system.mdquerying-views.mdstorage-backends.md
tile.json

indexing-system.mddocs/

Indexing System

The KVStore indexing system provides efficient data access and sorting through field and method annotations. Indices enable fast queries without loading all data and support hierarchical relationships between entities.

Capabilities

KVIndex Annotation

Tags fields or methods to be indexed when storing objects, enabling efficient sorting and querying.

/**
 * Tags a field or method to be indexed when storing an object.
 * Supports String, integral types (byte, short, int, long, boolean), and arrays.
 */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface KVIndex {
    
    /**
     * The natural index name constant.
     */
    String NATURAL_INDEX_NAME = "__main__";
    
    /**
     * The name of the index to be created for the annotated entity.
     * Must be unique within the class. Cannot start with underscore (reserved).
     * @return The index name (default: natural index name)
     */
    String value() default NATURAL_INDEX_NAME;
    
    /**
     * The name of the parent index of this index.
     * Creates hierarchical relationships between entities.
     * @return The parent index name (default: no parent)
     */
    String parent() default "";
    
    /**
     * Whether to copy the instance's data to the index.
     * Copy mode saves disk space but is slower to read due to indirection.
     * @return true to copy data, false to store pointer (default: false)
     */
    boolean copy() default false;
}

Type Information Management

KVTypeInfo provides introspection and access to indexed fields for stored objects.

/**
 * Wrapper around types managed in KVStore, providing access to indexed fields.
 * This class introspects types to find @KVIndex annotations on fields and methods.
 */
public class KVTypeInfo {
    
    /**
     * Create type information by introspecting the given type for indices.
     * @param type The class to introspect
     */
    public KVTypeInfo(Class<?> type);
    
    /**
     * Returns the managed type.
     * @return The class type being managed
     */
    public Class<?> type();
    
    /**
     * Gets the index value from an instance using reflection.
     * @param indexName The name of the index
     * @param instance The object instance to extract value from
     * @return The index value for the given instance
     * @throws Exception if reflection fails
     */
    public Object getIndexValue(String indexName, Object instance) throws Exception;
    
    /**
     * Returns a stream of all indices for the type.
     * @return Stream of KVIndex annotations
     */
    public Stream<KVIndex> indices();
}

Usage Examples:

Basic Indexing

import org.apache.spark.util.kvstore.KVIndex;

public class Employee {
    // Natural index - unique identifier (required)
    @KVIndex
    public String employeeId;
    
    // Secondary indices for efficient querying
    @KVIndex("department")
    public String department;
    
    @KVIndex("salary")
    public long salary;
    
    @KVIndex("hireDate")
    public java.time.LocalDate hireDate;
    
    @KVIndex("active")
    public boolean isActive;
    
    // Constructor
    public Employee(String id, String dept, long salary, LocalDate hireDate, boolean active) {
        this.employeeId = id;
        this.department = dept;
        this.salary = salary;
        this.hireDate = hireDate;
        this.isActive = active;
    }
}

// Store and query employees
KVStore store = new LevelDB(new File("/path/to/store"));

// Store employees
store.write(new Employee("E001", "Engineering", 75000, LocalDate.of(2020, 1, 15), true));
store.write(new Employee("E002", "Marketing", 65000, LocalDate.of(2019, 6, 10), true));
store.write(new Employee("E003", "Engineering", 80000, LocalDate.of(2021, 3, 20), false));

// Query by department
KVStoreView<Employee> engineers = store.view(Employee.class)
    .index("department")
    .first("Engineering")
    .last("Engineering");

// Query by salary range (70k-85k)
KVStoreView<Employee> highEarners = store.view(Employee.class)
    .index("salary")
    .first(70000L)
    .last(85000L);

// Query active employees
KVStoreView<Employee> activeEmployees = store.view(Employee.class)
    .index("active")
    .first(true)
    .last(true);

Method-Based Indexing

public class Task {
    @KVIndex
    private String taskId;
    
    private java.time.LocalDateTime createdAt;
    private java.time.LocalDateTime completedAt;
    
    // Index based on method result
    @KVIndex("dayOfYear")
    public int getCreationDayOfYear() {
        return createdAt.getDayOfYear();
    }
    
    @KVIndex("duration")
    public long getDurationMinutes() {
        if (completedAt == null) return -1;
        return java.time.Duration.between(createdAt, completedAt).toMinutes();
    }
    
    // Getter for natural key
    public String getTaskId() { return taskId; }
    
    // Constructor and other methods...
}

// Query tasks by creation day
KVStoreView<Task> todaysTasks = store.view(Task.class)
    .index("dayOfYear")
    .first(LocalDateTime.now().getDayOfYear())
    .last(LocalDateTime.now().getDayOfYear());

Hierarchical Indices (Parent-Child Relationships)

public class ProjectTask {
    @KVIndex
    public String taskId;
    
    // Parent index for project grouping
    @KVIndex("project")
    public String projectId;
    
    // Child index that depends on project
    @KVIndex(value = "priority", parent = "project")
    public int priority;
    
    @KVIndex(value = "status", parent = "project")
    public String status;
    
    public ProjectTask(String taskId, String projectId, int priority, String status) {
        this.taskId = taskId;
        this.projectId = projectId;
        this.priority = priority;
        this.status = status;
    }
}

// Query tasks within a specific project
KVStoreView<ProjectTask> projectTasks = store.view(ProjectTask.class)
    .index("project")
    .first("PROJECT-123")
    .last("PROJECT-123");

// Query high-priority tasks within a project
KVStoreView<ProjectTask> highPriorityTasks = store.view(ProjectTask.class)
    .index("priority")
    .parent("PROJECT-123")  // Required for child index
    .first(8)
    .reverse();

// Query running tasks within a project  
KVStoreView<ProjectTask> runningTasks = store.view(ProjectTask.class)
    .index("status")
    .parent("PROJECT-123")
    .first("running")
    .last("running");

Copy vs Pointer Indices

public class Document {
    @KVIndex
    public String documentId;
    
    // Pointer index (default) - saves space, slower reads
    @KVIndex("category")
    public String category;
    
    // Copy index - faster reads, uses more space
    @KVIndex(value = "priority", copy = true)
    public int priority;
    
    // Large object - use pointer to save space
    @KVIndex("contentHash")
    public String contentHash;
    
    public byte[] content;  // Large data not indexed
    
    public Document(String id, String category, int priority, String hash, byte[] content) {
        this.documentId = id;
        this.category = category;
        this.priority = priority;
        this.contentHash = hash;
        this.content = content;
    }
}

Array Indexing

public class Product {
    @KVIndex
    public String productId;
    
    // Array index - each tag creates a separate index entry
    @KVIndex("tags")
    public String[] tags;
    
    @KVIndex("categories")
    public int[] categoryIds;
    
    public Product(String id, String[] tags, int[] categories) {
        this.productId = id;
        this.tags = tags;
        this.categoryIds = categories;
    }
}

// Query products by tag
KVStoreView<Product> electronicsProducts = store.view(Product.class)
    .index("tags")
    .first("electronics")
    .last("electronics");

Supported Index Types

Primitive Types:

  • String - Text indexing with lexicographic ordering
  • byte, short, int, long - Numeric indexing with natural ordering
  • boolean - Boolean indexing (false < true)

Array Types:

  • String[] - Each array element creates separate index entries
  • byte[], short[], int[], long[] - Numeric array indexing
  • boolean[] - Boolean array indexing

Temporal Types:

  • java.time.LocalDate - Date indexing
  • java.time.LocalDateTime - Date-time indexing
  • java.util.Date - Legacy date indexing

Performance Considerations

// Index selection affects query performance
public class OptimizedEntity {
    @KVIndex
    public String id;
    
    // High-cardinality index - good for exact matches
    @KVIndex("userId")
    public String userId;
    
    // Low-cardinality index - good for filtering
    @KVIndex("status")
    public String status;  // "active", "inactive", "pending"
    
    // Range queries benefit from ordered indices
    @KVIndex("timestamp")
    public long timestamp;
    
    // Copy index for frequently accessed small data
    @KVIndex(value = "priority", copy = true)
    public int priority;
    
    // Don't index large, rarely queried fields
    public String description;  // No @KVIndex annotation
    public byte[] largeData;    // No @KVIndex annotation
}

// Efficient query patterns
// 1. Exact match on high-cardinality index
KVStoreView<OptimizedEntity> userEntities = store.view(OptimizedEntity.class)
    .index("userId")
    .first("user-12345")
    .last("user-12345");

// 2. Range query on ordered index
KVStoreView<OptimizedEntity> recentEntities = store.view(OptimizedEntity.class)
    .index("timestamp")
    .first(System.currentTimeMillis() - 86400000); // Last 24 hours

// 3. Filter by low-cardinality index
KVStoreView<OptimizedEntity> activeEntities = store.view(OptimizedEntity.class)
    .index("status")
    .first("active")
    .last("active");