or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audit-compliance.mddataset-management.mdindex.mdmetadata-management.mdnamespace-management.mdstream-processing.mdtransaction-management.mdusage-registry.md

metadata-management.mddocs/

0

# Metadata Management

1

2

Complete metadata management system for properties, tags, search, and indexing with support for custom indexing strategies and historical snapshots. The MetadataDataset provides comprehensive metadata operations for any entity in the CDAP system with efficient search and retrieval capabilities.

3

4

## Capabilities

5

6

### Core Metadata Operations

7

8

The primary interface for all metadata management operations, providing property and tag management with complete CRUD operations.

9

10

```java { .api }

11

public class MetadataDataset extends AbstractDataset {

12

// Property Management

13

public MetadataChange setProperty(MetadataEntity metadataEntity, String key, String value);

14

public MetadataChange setProperty(MetadataEntity metadataEntity, Map<String, String> properties);

15

public Map<String, String> getProperties(MetadataEntity metadataEntity);

16

public String getProperty(MetadataEntity metadataEntity, String key);

17

public MetadataChange removeProperties(MetadataEntity metadataEntity, Set<String> keys);

18

public MetadataChange removeProperties(MetadataEntity metadataEntity);

19

20

// Tag Management

21

public MetadataChange addTags(MetadataEntity metadataEntity, Set<String> tagsToAdd);

22

public Set<String> getTags(MetadataEntity metadataEntity);

23

public MetadataChange removeTags(MetadataEntity metadataEntity, Set<String> tagsToRemove);

24

public MetadataChange removeTags(MetadataEntity metadataEntity);

25

26

// Complete Metadata Retrieval

27

public Metadata getMetadata(MetadataEntity metadataEntity);

28

public Set<Metadata> getMetadata(Set<MetadataEntity> metadataEntities);

29

30

// Search Operations

31

public SearchResults search(SearchRequest request) throws BadRequestException;

32

33

// Historical Operations

34

public Set<Metadata> getSnapshotBeforeTime(Set<MetadataEntity> metadataEntitys, long timeMillis);

35

36

// Index Management

37

public MetadataChange rebuildIndexes(byte[] startRowKey, int limit);

38

public void deleteAllIndexes(int limit);

39

}

40

```

41

42

### Metadata Store Interface

43

44

High-level metadata operations interface providing scope-based metadata management for both system and user metadata with comprehensive CRUD operations.

45

46

```java { .api }

47

public interface MetadataStore {

48

// Property Management

49

void setProperties(MetadataScope scope, MetadataEntity metadataEntity, Map<String, String> properties);

50

void setProperty(MetadataScope scope, MetadataEntity metadataEntity, String key, String value);

51

Map<String, String> getProperties(MetadataEntity metadataEntity);

52

Map<String, String> getProperties(MetadataScope scope, MetadataEntity metadataEntity);

53

void removeProperties(MetadataScope scope, MetadataEntity metadataEntity);

54

void removeProperties(MetadataScope scope, MetadataEntity metadataEntity, Set<String> keys);

55

56

// Tag Management

57

void addTags(MetadataScope scope, MetadataEntity metadataEntity, Set<String> tagsToAdd);

58

Set<String> getTags(MetadataEntity metadataEntity);

59

Set<String> getTags(MetadataScope scope, MetadataEntity metadataEntity);

60

void removeTags(MetadataScope scope, MetadataEntity metadataEntity);

61

void removeTags(MetadataScope scope, MetadataEntity metadataEntity, Set<String> tagsToRemove);

62

63

// Complete Metadata Operations

64

Set<MetadataRecordV2> getMetadata(MetadataEntity metadataEntity);

65

MetadataRecordV2 getMetadata(MetadataScope scope, MetadataEntity metadataEntity);

66

Set<MetadataRecordV2> getMetadata(MetadataScope scope, Set<MetadataEntity> metadataEntitys);

67

void removeMetadata(MetadataEntity metadataEntity);

68

void removeMetadata(MetadataScope scope, MetadataEntity metadataEntity);

69

70

// Search Operations

71

MetadataSearchResponseV2 search(SearchRequest request);

72

73

// Historical Operations

74

Set<MetadataRecordV2> getSnapshotBeforeTime(MetadataScope scope, Set<MetadataEntity> metadataEntitys, long timeMillis);

75

76

// Administrative Operations

77

void rebuildIndexes(MetadataScope scope, RetryStrategy retryStrategy);

78

void createOrUpgrade(MetadataScope scope) throws DatasetManagementException, IOException;

79

}

80

```

81

82

### Metadata Indexing

83

84

Pluggable indexing strategies for efficient metadata search and retrieval across different data access patterns.

85

86

```java { .api }

87

// Base indexing interface

88

public interface Indexer {

89

Set<String> getIndexes(MetadataEntry entry);

90

}

91

92

// Standard indexing implementations

93

public class DefaultValueIndexer implements Indexer {

94

// Standard value-based indexing for exact matches

95

}

96

97

public class SchemaIndexer implements Indexer {

98

// Schema-aware indexing for structured metadata

99

}

100

101

public class InvertedValueIndexer implements Indexer {

102

// Reverse indexing for efficient range queries and sorting

103

}

104

105

public class InvertedTimeIndexer implements Indexer {

106

// Time-based reverse indexing for temporal queries

107

}

108

109

public class ValueOnlyIndexer implements Indexer {

110

// Value-only indexing without key information

111

}

112

113

public class MetadataEntityTypeIndexer implements Indexer {

114

// Entity type-based indexing for type-specific queries

115

}

116

```

117

118

### Search and Query Operations

119

120

Comprehensive search capabilities with flexible query parameters, sorting, and pagination support.

121

122

```java { .api }

123

// Search request configuration

124

public class SearchRequest {

125

public static Builder builder();

126

public String getQuery();

127

public Set<MetadataScope> getScopes();

128

public Set<EntityTypeSimpleName> getTypes();

129

public SortInfo getSortInfo();

130

public int getOffset();

131

public int getLimit();

132

public boolean shouldShowHidden();

133

public Set<String> getCursorRequiredFields();

134

135

public static class Builder {

136

public Builder setQuery(String query);

137

public Builder setScopes(Set<MetadataScope> scopes);

138

public Builder setTypes(Set<EntityTypeSimpleName> types);

139

public Builder setSortInfo(SortInfo sortInfo);

140

public Builder setOffset(int offset);

141

public Builder setLimit(int limit);

142

public Builder setShowHidden(boolean showHidden);

143

public Builder setCursorRequiredFields(Set<String> fields);

144

public SearchRequest build();

145

}

146

}

147

148

// Search results with pagination

149

public class SearchResults {

150

public List<MetadataSearchResultRecord> getResults();

151

public String getCursor();

152

public int getTotal();

153

public boolean hasMore();

154

}

155

156

// Search result record structure

157

public class MetadataSearchResultRecord {

158

public MetadataEntity getMetadataEntity();

159

public Metadata getMetadata();

160

}

161

162

// Sorting configuration

163

public class SortInfo {

164

public String getSortBy();

165

public SortOrder getSortOrder();

166

167

public enum SortOrder {

168

ASC, DESC

169

}

170

}

171

```

172

173

## Usage Examples

174

175

### Basic Property and Tag Management

176

177

```java

178

// Access metadata dataset (typically injected)

179

MetadataDataset metadataDataset = // ... obtain instance

180

181

// Define entity to operate on

182

MetadataEntity entity = MetadataEntity.ofDataset(NamespaceId.DEFAULT, "userProfiles");

183

184

// Set properties

185

Map<String, String> properties = Map.of(

186

"environment", "production",

187

"owner", "team-alpha",

188

"created", "2023-01-15",

189

"format", "parquet",

190

"compression", "snappy"

191

);

192

193

MetadataChange change = metadataDataset.setProperty(entity, properties);

194

System.out.println("Properties added: " + change.getAfter().getProperties());

195

196

// Add tags

197

Set<String> tags = Set.of("production", "critical", "team-alpha", "analytics");

198

metadataDataset.addTags(entity, tags);

199

200

// Retrieve complete metadata

201

Metadata metadata = metadataDataset.getMetadata(entity);

202

System.out.println("Properties: " + metadata.getProperties());

203

System.out.println("Tags: " + metadata.getTags());

204

205

// Update specific property

206

metadataDataset.setProperty(entity, "last_updated", "2023-06-20");

207

208

// Remove specific tags

209

metadataDataset.removeTags(entity, Set.of("analytics"));

210

```

211

212

### Advanced Search Operations

213

214

```java

215

// Search by property value

216

SearchRequest request = SearchRequest.builder()

217

.setQuery("properties:environment:production")

218

.setTypes(Set.of(EntityTypeSimpleName.DATASET))

219

.setLimit(50)

220

.build();

221

222

SearchResults results = metadataDataset.search(request);

223

for (MetadataSearchResultRecord record : results.getResults()) {

224

System.out.println("Dataset: " + record.getMetadataEntity());

225

System.out.println("Environment: " + record.getMetadata().getProperties().get("environment"));

226

}

227

228

// Search by tags with sorting

229

SearchRequest tagSearch = SearchRequest.builder()

230

.setQuery("tags:critical")

231

.setSortInfo(new SortInfo("entity_name", SortInfo.SortOrder.ASC))

232

.setOffset(0)

233

.setLimit(100)

234

.build();

235

236

SearchResults tagResults = metadataDataset.search(tagSearch);

237

238

// Complex query combining properties and tags

239

SearchRequest complexSearch = SearchRequest.builder()

240

.setQuery("(properties:owner:team-alpha) AND (tags:production)")

241

.setTypes(Set.of(EntityTypeSimpleName.DATASET, EntityTypeSimpleName.APPLICATION))

242

.build();

243

244

SearchResults complexResults = metadataDataset.search(complexSearch);

245

246

// Paginated search with cursor

247

String cursor = null;

248

do {

249

SearchRequest paginatedRequest = SearchRequest.builder()

250

.setQuery("tags:analytics")

251

.setLimit(20)

252

.setCursorRequiredFields(Set.of("entity_name"))

253

.build();

254

255

SearchResults page = metadataDataset.search(paginatedRequest);

256

processResults(page.getResults());

257

cursor = page.getCursor();

258

} while (cursor != null);

259

```

260

261

### Historical Metadata Operations

262

263

```java

264

// Get historical snapshot of metadata

265

Set<MetadataEntity> entities = Set.of(

266

MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset1"),

267

MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset2")

268

);

269

270

// Get metadata as it existed 24 hours ago

271

long yesterday = System.currentTimeMillis() - (24 * 60 * 60 * 1000);

272

Set<Metadata> historicalMetadata = metadataDataset.getSnapshotBeforeTime(entities, yesterday);

273

274

for (Metadata historical : historicalMetadata) {

275

System.out.println("Historical properties: " + historical.getProperties());

276

System.out.println("Historical tags: " + historical.getTags());

277

}

278

279

// Compare with current metadata

280

for (MetadataEntity entity : entities) {

281

Metadata current = metadataDataset.getMetadata(entity);

282

// Compare current vs historical...

283

}

284

```

285

286

### Custom Indexing and Performance

287

288

```java

289

// Rebuild indexes for performance optimization

290

// Process in batches to avoid memory issues

291

byte[] startRowKey = null;

292

int batchSize = 1000;

293

294

do {

295

MetadataChange indexRebuild = metadataDataset.rebuildIndexes(startRowKey, batchSize);

296

System.out.println("Rebuilt indexes for batch");

297

298

// Update startRowKey for next batch based on processing

299

startRowKey = getNextBatchStartKey();

300

} while (startRowKey != null);

301

302

// Delete all indexes (typically for maintenance)

303

metadataDataset.deleteAllIndexes(1000);

304

```

305

306

### Batch Metadata Operations

307

308

```java

309

// Efficient batch metadata retrieval

310

Set<MetadataEntity> entities = Set.of(

311

MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset1"),

312

MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset2"),

313

MetadataEntity.ofDataset(NamespaceId.DEFAULT, "dataset3")

314

);

315

316

Set<Metadata> batchResults = metadataDataset.getMetadata(entities);

317

for (Metadata result : batchResults) {

318

System.out.println("Entity metadata: " + result);

319

}

320

321

// Batch property updates

322

for (MetadataEntity entity : entities) {

323

Map<String, String> properties = Map.of(

324

"batch_processed", "true",

325

"processed_time", String.valueOf(System.currentTimeMillis())

326

);

327

metadataDataset.setProperty(entity, properties);

328

}

329

```

330

331

## Types

332

333

```java { .api }

334

// Core metadata structures

335

public final class Metadata {

336

public MetadataEntity getEntity();

337

public Map<String, String> getProperties();

338

public Set<String> getTags();

339

public MetadataScope getScope();

340

}

341

342

public final class MetadataEntry {

343

public String getKey();

344

public String getValue();

345

public MetadataKind getKind();

346

}

347

348

public final class MetadataChange {

349

public Metadata getBefore();

350

public Metadata getAfter();

351

}

352

353

public final class MetadataKey {

354

public MetadataEntity getEntity();

355

public String getKey();

356

public MetadataKind getKind();

357

}

358

359

// Entity identification

360

public interface MetadataEntity {

361

EntityType getType();

362

String getValue();

363

364

// Factory methods for common entity types

365

public static MetadataEntity ofDataset(NamespaceId namespaceId, String dataset);

366

public static MetadataEntity ofApplication(ApplicationId applicationId);

367

public static MetadataEntity ofProgram(ProgramId programId);

368

public static MetadataEntity ofStream(StreamId streamId);

369

}

370

371

// Metadata scopes

372

public enum MetadataScope {

373

USER, // User-defined metadata

374

SYSTEM // System-generated metadata

375

}

376

377

// Metadata kinds

378

public enum MetadataKind {

379

PROPERTY,

380

TAG

381

}

382

383

// Entity types for search filtering

384

public enum EntityTypeSimpleName {

385

DATASET,

386

APPLICATION,

387

PROGRAM,

388

STREAM,

389

VIEW,

390

NAMESPACE,

391

ARTIFACT

392

}

393

394

// Exception types

395

public class MetadataException extends Exception {

396

public MetadataException(String message);

397

public MetadataException(String message, Throwable cause);

398

}

399

400

public class BadRequestException extends Exception {

401

public BadRequestException(String message);

402

public BadRequestException(String message, Throwable cause);

403

}

404

```