or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-operations.mdindex.mdindexing-system.mdquerying-views.mdstorage-backends.md

indexing-system.mddocs/

0

# Indexing System

1

2

The KVStore indexing system provides efficient data access and sorting through field and method annotations. Indices enable fast queries without loading all data and support hierarchical relationships between entities.

3

4

## Capabilities

5

6

### KVIndex Annotation

7

8

Tags fields or methods to be indexed when storing objects, enabling efficient sorting and querying.

9

10

```java { .api }

11

/**

12

* Tags a field or method to be indexed when storing an object.

13

* Supports String, integral types (byte, short, int, long, boolean), and arrays.

14

*/

15

@Retention(RetentionPolicy.RUNTIME)

16

@Target({ElementType.FIELD, ElementType.METHOD})

17

public @interface KVIndex {

18

19

/**

20

* The natural index name constant.

21

*/

22

String NATURAL_INDEX_NAME = "__main__";

23

24

/**

25

* The name of the index to be created for the annotated entity.

26

* Must be unique within the class. Cannot start with underscore (reserved).

27

* @return The index name (default: natural index name)

28

*/

29

String value() default NATURAL_INDEX_NAME;

30

31

/**

32

* The name of the parent index of this index.

33

* Creates hierarchical relationships between entities.

34

* @return The parent index name (default: no parent)

35

*/

36

String parent() default "";

37

38

/**

39

* Whether to copy the instance's data to the index.

40

* Copy mode saves disk space but is slower to read due to indirection.

41

* @return true to copy data, false to store pointer (default: false)

42

*/

43

boolean copy() default false;

44

}

45

```

46

47

### Type Information Management

48

49

KVTypeInfo provides introspection and access to indexed fields for stored objects.

50

51

```java { .api }

52

/**

53

* Wrapper around types managed in KVStore, providing access to indexed fields.

54

* This class introspects types to find @KVIndex annotations on fields and methods.

55

*/

56

public class KVTypeInfo {

57

58

/**

59

* Create type information by introspecting the given type for indices.

60

* @param type The class to introspect

61

*/

62

public KVTypeInfo(Class<?> type);

63

64

/**

65

* Returns the managed type.

66

* @return The class type being managed

67

*/

68

public Class<?> type();

69

70

/**

71

* Gets the index value from an instance using reflection.

72

* @param indexName The name of the index

73

* @param instance The object instance to extract value from

74

* @return The index value for the given instance

75

* @throws Exception if reflection fails

76

*/

77

public Object getIndexValue(String indexName, Object instance) throws Exception;

78

79

/**

80

* Returns a stream of all indices for the type.

81

* @return Stream of KVIndex annotations

82

*/

83

public Stream<KVIndex> indices();

84

}

85

86

```

87

88

**Usage Examples:**

89

90

### Basic Indexing

91

92

```java

93

import org.apache.spark.util.kvstore.KVIndex;

94

95

public class Employee {

96

// Natural index - unique identifier (required)

97

@KVIndex

98

public String employeeId;

99

100

// Secondary indices for efficient querying

101

@KVIndex("department")

102

public String department;

103

104

@KVIndex("salary")

105

public long salary;

106

107

@KVIndex("hireDate")

108

public java.time.LocalDate hireDate;

109

110

@KVIndex("active")

111

public boolean isActive;

112

113

// Constructor

114

public Employee(String id, String dept, long salary, LocalDate hireDate, boolean active) {

115

this.employeeId = id;

116

this.department = dept;

117

this.salary = salary;

118

this.hireDate = hireDate;

119

this.isActive = active;

120

}

121

}

122

123

// Store and query employees

124

KVStore store = new LevelDB(new File("/path/to/store"));

125

126

// Store employees

127

store.write(new Employee("E001", "Engineering", 75000, LocalDate.of(2020, 1, 15), true));

128

store.write(new Employee("E002", "Marketing", 65000, LocalDate.of(2019, 6, 10), true));

129

store.write(new Employee("E003", "Engineering", 80000, LocalDate.of(2021, 3, 20), false));

130

131

// Query by department

132

KVStoreView<Employee> engineers = store.view(Employee.class)

133

.index("department")

134

.first("Engineering")

135

.last("Engineering");

136

137

// Query by salary range (70k-85k)

138

KVStoreView<Employee> highEarners = store.view(Employee.class)

139

.index("salary")

140

.first(70000L)

141

.last(85000L);

142

143

// Query active employees

144

KVStoreView<Employee> activeEmployees = store.view(Employee.class)

145

.index("active")

146

.first(true)

147

.last(true);

148

```

149

150

### Method-Based Indexing

151

152

```java

153

public class Task {

154

@KVIndex

155

private String taskId;

156

157

private java.time.LocalDateTime createdAt;

158

private java.time.LocalDateTime completedAt;

159

160

// Index based on method result

161

@KVIndex("dayOfYear")

162

public int getCreationDayOfYear() {

163

return createdAt.getDayOfYear();

164

}

165

166

@KVIndex("duration")

167

public long getDurationMinutes() {

168

if (completedAt == null) return -1;

169

return java.time.Duration.between(createdAt, completedAt).toMinutes();

170

}

171

172

// Getter for natural key

173

public String getTaskId() { return taskId; }

174

175

// Constructor and other methods...

176

}

177

178

// Query tasks by creation day

179

KVStoreView<Task> todaysTasks = store.view(Task.class)

180

.index("dayOfYear")

181

.first(LocalDateTime.now().getDayOfYear())

182

.last(LocalDateTime.now().getDayOfYear());

183

```

184

185

### Hierarchical Indices (Parent-Child Relationships)

186

187

```java

188

public class ProjectTask {

189

@KVIndex

190

public String taskId;

191

192

// Parent index for project grouping

193

@KVIndex("project")

194

public String projectId;

195

196

// Child index that depends on project

197

@KVIndex(value = "priority", parent = "project")

198

public int priority;

199

200

@KVIndex(value = "status", parent = "project")

201

public String status;

202

203

public ProjectTask(String taskId, String projectId, int priority, String status) {

204

this.taskId = taskId;

205

this.projectId = projectId;

206

this.priority = priority;

207

this.status = status;

208

}

209

}

210

211

// Query tasks within a specific project

212

KVStoreView<ProjectTask> projectTasks = store.view(ProjectTask.class)

213

.index("project")

214

.first("PROJECT-123")

215

.last("PROJECT-123");

216

217

// Query high-priority tasks within a project

218

KVStoreView<ProjectTask> highPriorityTasks = store.view(ProjectTask.class)

219

.index("priority")

220

.parent("PROJECT-123") // Required for child index

221

.first(8)

222

.reverse();

223

224

// Query running tasks within a project

225

KVStoreView<ProjectTask> runningTasks = store.view(ProjectTask.class)

226

.index("status")

227

.parent("PROJECT-123")

228

.first("running")

229

.last("running");

230

```

231

232

### Copy vs Pointer Indices

233

234

```java

235

public class Document {

236

@KVIndex

237

public String documentId;

238

239

// Pointer index (default) - saves space, slower reads

240

@KVIndex("category")

241

public String category;

242

243

// Copy index - faster reads, uses more space

244

@KVIndex(value = "priority", copy = true)

245

public int priority;

246

247

// Large object - use pointer to save space

248

@KVIndex("contentHash")

249

public String contentHash;

250

251

public byte[] content; // Large data not indexed

252

253

public Document(String id, String category, int priority, String hash, byte[] content) {

254

this.documentId = id;

255

this.category = category;

256

this.priority = priority;

257

this.contentHash = hash;

258

this.content = content;

259

}

260

}

261

```

262

263

### Array Indexing

264

265

```java

266

public class Product {

267

@KVIndex

268

public String productId;

269

270

// Array index - each tag creates a separate index entry

271

@KVIndex("tags")

272

public String[] tags;

273

274

@KVIndex("categories")

275

public int[] categoryIds;

276

277

public Product(String id, String[] tags, int[] categories) {

278

this.productId = id;

279

this.tags = tags;

280

this.categoryIds = categories;

281

}

282

}

283

284

// Query products by tag

285

KVStoreView<Product> electronicsProducts = store.view(Product.class)

286

.index("tags")

287

.first("electronics")

288

.last("electronics");

289

```

290

291

### Supported Index Types

292

293

**Primitive Types:**

294

- `String` - Text indexing with lexicographic ordering

295

- `byte`, `short`, `int`, `long` - Numeric indexing with natural ordering

296

- `boolean` - Boolean indexing (false < true)

297

298

**Array Types:**

299

- `String[]` - Each array element creates separate index entries

300

- `byte[]`, `short[]`, `int[]`, `long[]` - Numeric array indexing

301

- `boolean[]` - Boolean array indexing

302

303

**Temporal Types:**

304

- `java.time.LocalDate` - Date indexing

305

- `java.time.LocalDateTime` - Date-time indexing

306

- `java.util.Date` - Legacy date indexing

307

308

### Performance Considerations

309

310

```java

311

// Index selection affects query performance

312

public class OptimizedEntity {

313

@KVIndex

314

public String id;

315

316

// High-cardinality index - good for exact matches

317

@KVIndex("userId")

318

public String userId;

319

320

// Low-cardinality index - good for filtering

321

@KVIndex("status")

322

public String status; // "active", "inactive", "pending"

323

324

// Range queries benefit from ordered indices

325

@KVIndex("timestamp")

326

public long timestamp;

327

328

// Copy index for frequently accessed small data

329

@KVIndex(value = "priority", copy = true)

330

public int priority;

331

332

// Don't index large, rarely queried fields

333

public String description; // No @KVIndex annotation

334

public byte[] largeData; // No @KVIndex annotation

335

}

336

337

// Efficient query patterns

338

// 1. Exact match on high-cardinality index

339

KVStoreView<OptimizedEntity> userEntities = store.view(OptimizedEntity.class)

340

.index("userId")

341

.first("user-12345")

342

.last("user-12345");

343

344

// 2. Range query on ordered index

345

KVStoreView<OptimizedEntity> recentEntities = store.view(OptimizedEntity.class)

346

.index("timestamp")

347

.first(System.currentTimeMillis() - 86400000); // Last 24 hours

348

349

// 3. Filter by low-cardinality index

350

KVStoreView<OptimizedEntity> activeEntities = store.view(OptimizedEntity.class)

351

.index("status")

352

.first("active")

353

.last("active");

354

```