0
# Metadata Management
1
2
Complete metadata management system for properties, tags, search, and indexing with support for custom indexing strategies and historical snapshots. The MetadataDataset provides comprehensive metadata operations for any entity in the CDAP system with efficient search and retrieval capabilities.
3
4
## Capabilities
5
6
### Core Metadata Operations
7
8
The primary interface for all metadata management operations, providing property and tag management with complete CRUD operations.
9
10
```java { .api }
11
public class MetadataDataset extends AbstractDataset {
12
// Property Management
13
public MetadataChange setProperty(MetadataEntity metadataEntity, String key, String value);
14
public MetadataChange setProperty(MetadataEntity metadataEntity, Map<String, String> properties);
15
public Map<String, String> getProperties(MetadataEntity metadataEntity);
16
public String getProperty(MetadataEntity metadataEntity, String key);
17
public MetadataChange removeProperties(MetadataEntity metadataEntity, Set<String> keys);
18
public MetadataChange removeProperties(MetadataEntity metadataEntity);
19
20
// Tag Management
21
public MetadataChange addTags(MetadataEntity metadataEntity, Set<String> tagsToAdd);
22
public Set<String> getTags(MetadataEntity metadataEntity);
23
public MetadataChange removeTags(MetadataEntity metadataEntity, Set<String> tagsToRemove);
24
public MetadataChange removeTags(MetadataEntity metadataEntity);
25
26
// Complete Metadata Retrieval
27
public Metadata getMetadata(MetadataEntity metadataEntity);
28
public Set<Metadata> getMetadata(Set<MetadataEntity> metadataEntities);
29
30
// Search Operations
31
public SearchResults search(SearchRequest request) throws BadRequestException;
32
33
// Historical Operations
34
public Set<Metadata> getSnapshotBeforeTime(Set<MetadataEntity> metadataEntitys, long timeMillis);
35
36
// Index Management
37
public MetadataChange rebuildIndexes(byte[] startRowKey, int limit);
38
public void deleteAllIndexes(int limit);
39
}
40
```
41
42
### Metadata Store Interface
43
44
High-level metadata operations interface providing scope-based metadata management for both system and user metadata with comprehensive CRUD operations.
45
46
```java { .api }
47
public interface MetadataStore {
48
// Property Management
49
void setProperties(MetadataScope scope, MetadataEntity metadataEntity, Map<String, String> properties);
50
void setProperty(MetadataScope scope, MetadataEntity metadataEntity, String key, String value);
51
Map<String, String> getProperties(MetadataEntity metadataEntity);
52
Map<String, String> getProperties(MetadataScope scope, MetadataEntity metadataEntity);
53
void removeProperties(MetadataScope scope, MetadataEntity metadataEntity);
54
void removeProperties(MetadataScope scope, MetadataEntity metadataEntity, Set<String> keys);
55
56
// Tag Management
57
void addTags(MetadataScope scope, MetadataEntity metadataEntity, Set<String> tagsToAdd);
58
Set<String> getTags(MetadataEntity metadataEntity);
59
Set<String> getTags(MetadataScope scope, MetadataEntity metadataEntity);
60
void removeTags(MetadataScope scope, MetadataEntity metadataEntity);
61
void removeTags(MetadataScope scope, MetadataEntity metadataEntity, Set<String> tagsToRemove);
62
63
// Complete Metadata Operations
64
Set<MetadataRecordV2> getMetadata(MetadataEntity metadataEntity);
65
MetadataRecordV2 getMetadata(MetadataScope scope, MetadataEntity metadataEntity);
66
Set<MetadataRecordV2> getMetadata(MetadataScope scope, Set<MetadataEntity> metadataEntitys);
67
void removeMetadata(MetadataEntity metadataEntity);
68
void removeMetadata(MetadataScope scope, MetadataEntity metadataEntity);
69
70
// Search Operations
71
MetadataSearchResponseV2 search(SearchRequest request);
72
73
// Historical Operations
74
Set<MetadataRecordV2> getSnapshotBeforeTime(MetadataScope scope, Set<MetadataEntity> metadataEntitys, long timeMillis);
75
76
// Administrative Operations
77
void rebuildIndexes(MetadataScope scope, RetryStrategy retryStrategy);
78
void createOrUpgrade(MetadataScope scope) throws DatasetManagementException, IOException;
79
}
80
```
81
82
### Metadata Indexing
83
84
Pluggable indexing strategies for efficient metadata search and retrieval across different data access patterns.
85
86
```java { .api }
87
// Base indexing interface
88
public interface Indexer {
89
Set<String> getIndexes(MetadataEntry entry);
90
}
91
92
// Standard indexing implementations
93
public class DefaultValueIndexer implements Indexer {
94
// Standard value-based indexing for exact matches
95
}
96
97
public class SchemaIndexer implements Indexer {
98
// Schema-aware indexing for structured metadata
99
}
100
101
public class InvertedValueIndexer implements Indexer {
102
// Reverse indexing for efficient range queries and sorting
103
}
104
105
public class InvertedTimeIndexer implements Indexer {
106
// Time-based reverse indexing for temporal queries
107
}
108
109
public class ValueOnlyIndexer implements Indexer {
110
// Value-only indexing without key information
111
}
112
113
public class MetadataEntityTypeIndexer implements Indexer {
114
// Entity type-based indexing for type-specific queries
115
}
116
```
117
118
### Search and Query Operations
119
120
Comprehensive search capabilities with flexible query parameters, sorting, and pagination support.
121
122
```java { .api }
123
// Search request configuration
124
public class SearchRequest {
125
public static Builder builder();
126
public String getQuery();
127
public Set<MetadataScope> getScopes();
128
public Set<EntityTypeSimpleName> getTypes();
129
public SortInfo getSortInfo();
130
public int getOffset();
131
public int getLimit();
132
public boolean shouldShowHidden();
133
public Set<String> getCursorRequiredFields();
134
135
public static class Builder {
136
public Builder setQuery(String query);
137
public Builder setScopes(Set<MetadataScope> scopes);
138
public Builder setTypes(Set<EntityTypeSimpleName> types);
139
public Builder setSortInfo(SortInfo sortInfo);
140
public Builder setOffset(int offset);
141
public Builder setLimit(int limit);
142
public Builder setShowHidden(boolean showHidden);
143
public Builder setCursorRequiredFields(Set<String> fields);
144
public SearchRequest build();
145
}
146
}
147
148
// Search results with pagination
149
public class SearchResults {
150
public List<MetadataSearchResultRecord> getResults();
151
public String getCursor();
152
public int getTotal();
153
public boolean hasMore();
154
}
155
156
// Search result record structure
157
public class MetadataSearchResultRecord {
158
public MetadataEntity getMetadataEntity();
159
public Metadata getMetadata();
160
}
161
162
// Sorting configuration
163
public class SortInfo {
164
public String getSortBy();
165
public SortOrder getSortOrder();
166
167
public enum SortOrder {
168
ASC, DESC
169
}
170
}
171
```
172
173
## Usage Examples
174
175
### Basic Property and Tag Management
176
177
```java
178
// Access metadata dataset (typically injected)
179
MetadataDataset metadataDataset = // ... obtain instance
180
181
// Define entity to operate on
182
MetadataEntity entity = MetadataEntity.ofDataset(NamespaceId.DEFAULT, "userProfiles");
183
184
// Set properties
185
Map<String, String> properties = Map.of(
186
"environment", "production",
187
"owner", "team-alpha",
188
"created", "2023-01-15",
189
"format", "parquet",
190
"compression", "snappy"
191
);
192
193
MetadataChange change = metadataDataset.setProperty(entity, properties);
194
System.out.println("Properties added: " + change.getAfter().getProperties());
195
196
// Add tags
197
Set<String> tags = Set.of("production", "critical", "team-alpha", "analytics");
198
metadataDataset.addTags(entity, tags);
199
200
// Retrieve complete metadata
201
Metadata metadata = metadataDataset.getMetadata(entity);
202
System.out.println("Properties: " + metadata.getProperties());
203
System.out.println("Tags: " + metadata.getTags());
204
205
// Update specific property
206
metadataDataset.setProperty(entity, "last_updated", "2023-06-20");
207
208
// Remove specific tags
209
metadataDataset.removeTags(entity, Set.of("analytics"));
210
```
211
212
### Advanced Search Operations
213
214
```java
215
// Search by property value
216
SearchRequest request = SearchRequest.builder()
217
.setQuery("properties:environment:production")
218
.setTypes(Set.of(EntityTypeSimpleName.DATASET))
219
.setLimit(50)
220
.build();
221
222
SearchResults results = metadataDataset.search(request);
223
for (MetadataSearchResultRecord record : results.getResults()) {
224
System.out.println("Dataset: " + record.getMetadataEntity());
225
System.out.println("Environment: " + record.getMetadata().getProperties().get("environment"));
226
}
227
228
// Search by tags with sorting
229
SearchRequest tagSearch = SearchRequest.builder()
230
.setQuery("tags:critical")
231
.setSortInfo(new SortInfo("entity_name", SortInfo.SortOrder.ASC))
232
.setOffset(0)
233
.setLimit(100)
234
.build();
235
236
SearchResults tagResults = metadataDataset.search(tagSearch);
237
238
// Complex query combining properties and tags
239
SearchRequest complexSearch = SearchRequest.builder()
240
.setQuery("(properties:owner:team-alpha) AND (tags:production)")
241
.setTypes(Set.of(EntityTypeSimpleName.DATASET, EntityTypeSimpleName.APPLICATION))
242
.build();
243
244
SearchResults complexResults = metadataDataset.search(complexSearch);
245
246
// Paginated search with cursor
247
String cursor = null;
248
do {
249
SearchRequest paginatedRequest = SearchRequest.builder()
250
.setQuery("tags:analytics")
251
.setLimit(20)
252
.setCursorRequiredFields(Set.of("entity_name"))
253
.build();
254
255
SearchResults page = metadataDataset.search(paginatedRequest);
256
processResults(page.getResults());
257
cursor = page.getCursor();
258
} while (cursor != null);
259
```
260
261
### Historical Metadata Operations
262
263
```java
264
// Get historical snapshot of metadata
265
Set<MetadataEntity> entities = Set.of(
266
MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset1"),
267
MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset2")
268
);
269
270
// Get metadata as it existed 24 hours ago
271
long yesterday = System.currentTimeMillis() - (24 * 60 * 60 * 1000);
272
Set<Metadata> historicalMetadata = metadataDataset.getSnapshotBeforeTime(entities, yesterday);
273
274
for (Metadata historical : historicalMetadata) {
275
System.out.println("Historical properties: " + historical.getProperties());
276
System.out.println("Historical tags: " + historical.getTags());
277
}
278
279
// Compare with current metadata
280
for (MetadataEntity entity : entities) {
281
Metadata current = metadataDataset.getMetadata(entity);
282
// Compare current vs historical...
283
}
284
```
285
286
### Custom Indexing and Performance
287
288
```java
289
// Rebuild indexes for performance optimization
290
// Process in batches to avoid memory issues
291
byte[] startRowKey = null;
292
int batchSize = 1000;
293
294
do {
295
MetadataChange indexRebuild = metadataDataset.rebuildIndexes(startRowKey, batchSize);
296
System.out.println("Rebuilt indexes for batch");
297
298
// Update startRowKey for next batch based on processing
299
startRowKey = getNextBatchStartKey();
300
} while (startRowKey != null);
301
302
// Delete all indexes (typically for maintenance)
303
metadataDataset.deleteAllIndexes(1000);
304
```
305
306
### Batch Metadata Operations
307
308
```java
309
// Efficient batch metadata retrieval
310
Set<MetadataEntity> entities = Set.of(
311
MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset1"),
312
MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset2"),
313
MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset3")
314
);
315
316
Set<Metadata> batchResults = metadataDataset.getMetadata(entities);
317
for (Metadata result : batchResults) {
318
System.out.println("Entity metadata: " + result);
319
}
320
321
// Batch property updates
322
for (MetadataEntity entity : entities) {
323
Map<String, String> properties = Map.of(
324
"batch_processed", "true",
325
"processed_time", String.valueOf(System.currentTimeMillis())
326
);
327
metadataDataset.setProperty(entity, properties);
328
}
329
```
330
331
## Types
332
333
```java { .api }
334
// Core metadata structures
335
public final class Metadata {
336
public MetadataEntity getEntity();
337
public Map<String, String> getProperties();
338
public Set<String> getTags();
339
public MetadataScope getScope();
340
}
341
342
public final class MetadataEntry {
343
public String getKey();
344
public String getValue();
345
public MetadataKind getKind();
346
}
347
348
public final class MetadataChange {
349
public Metadata getBefore();
350
public Metadata getAfter();
351
}
352
353
public final class MetadataKey {
354
public MetadataEntity getEntity();
355
public String getKey();
356
public MetadataKind getKind();
357
}
358
359
// Entity identification
360
public interface MetadataEntity {
361
EntityType getType();
362
String getValue();
363
364
// Factory methods for common entity types
365
public static MetadataEntity ofDataset(NamespaceId namespaceId, String dataset);
366
public static MetadataEntity ofApplication(ApplicationId applicationId);
367
public static MetadataEntity ofProgram(ProgramId programId);
368
public static MetadataEntity ofStream(StreamId streamId);
369
}
370
371
// Metadata scopes
372
public enum MetadataScope {
373
USER, // User-defined metadata
374
SYSTEM // System-generated metadata
375
}
376
377
// Metadata kinds
378
public enum MetadataKind {
379
PROPERTY,
380
TAG
381
}
382
383
// Entity types for search filtering
384
public enum EntityTypeSimpleName {
385
DATASET,
386
APPLICATION,
387
PROGRAM,
388
STREAM,
389
VIEW,
390
NAMESPACE,
391
ARTIFACT
392
}
393
394
// Exception types
395
public class MetadataException extends Exception {
396
public MetadataException(String message);
397
public MetadataException(String message, Throwable cause);
398
}
399
400
public class BadRequestException extends Exception {
401
public BadRequestException(String message);
402
public BadRequestException(String message, Throwable cause);
403
}
404
```