CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-co-cask-cdap--cdap-explore-client

Java client interfaces for exploring and querying CDAP datasets using Apache Hive SQL with asynchronous execution and metadata operations.

Pending
Overview
Eval results
Files

dataset-management.mddocs/

Dataset Management

Enable and disable exploration capabilities on CDAP datasets, manage partitions, handle dataset lifecycle operations, and integrate with stream processing. Provides comprehensive dataset exploration management for both simple datasets and complex partitioned data structures.

Capabilities

Dataset Exploration Control

Enable and disable exploration capabilities for CDAP datasets with various configuration options.

/**
 * Dataset exploration management via ExploreClient
 */
interface ExploreClient {
    /**
     * Enable exploration for a dataset (basic)
     * @param datasetInstance dataset to enable exploration for
     * @return future completing when operation finishes
     */
    ListenableFuture<Void> enableExploreDataset(DatasetId datasetInstance);
    
    /**
     * Enable exploration with detailed specification
     * @param datasetInstance dataset to enable exploration for
     * @param spec dataset specification containing schema and properties
     * @param truncating whether to truncate existing data during enable
     * @return future completing when operation finishes
     */
    ListenableFuture<Void> enableExploreDataset(DatasetId datasetInstance, 
                                               DatasetSpecification spec, boolean truncating);
    
    /**
     * Update exploration configuration for existing dataset
     * @param datasetInstance dataset to update
     * @param oldSpec previous dataset specification
     * @param newSpec new dataset specification
     * @return future completing when operation finishes
     */
    ListenableFuture<Void> updateExploreDataset(DatasetId datasetInstance, 
                                               DatasetSpecification oldSpec, 
                                               DatasetSpecification newSpec);
    
    /**
     * Disable exploration for a dataset (basic)
     * @param datasetInstance dataset to disable exploration for
     * @return future completing when operation finishes
     */
    ListenableFuture<Void> disableExploreDataset(DatasetId datasetInstance);
    
    /**
     * Disable exploration with specification
     * @param datasetInstance dataset to disable exploration for
     * @param spec dataset specification
     * @return future completing when operation finishes
     */
    ListenableFuture<Void> disableExploreDataset(DatasetId datasetInstance, 
                                                DatasetSpecification spec);
}

/**
 * Simplified dataset exploration via ExploreFacade
 */
class ExploreFacade {
    /**
     * Enable exploration with automatic configuration checking
     * @param datasetInstance dataset to enable exploration for
     * @throws ExploreException if operation fails
     */
    void enableExploreDataset(DatasetId datasetInstance) throws ExploreException;
    
    /**
     * Enable exploration with specification and options
     * @param datasetInstance dataset to enable
     * @param spec dataset specification
     * @param truncating whether to truncate existing data
     * @throws ExploreException if operation fails
     */
    void enableExploreDataset(DatasetId datasetInstance, DatasetSpecification spec, 
                             boolean truncating) throws ExploreException;
    
    /**
     * Update dataset exploration configuration
     * @param datasetInstance dataset to update
     * @param oldSpec old specification
     * @param newSpec new specification
     * @throws ExploreException if operation fails
     */
    void updateExploreDataset(DatasetId datasetInstance, DatasetSpecification oldSpec, 
                             DatasetSpecification newSpec) throws ExploreException;
    
    /**
     * Disable exploration for a dataset
     * @param datasetInstance dataset to disable exploration for
     * @throws ExploreException if operation fails
     */
    void disableExploreDataset(DatasetId datasetInstance) throws ExploreException;
    
    /**
     * Disable exploration with specification
     * @param datasetInstance dataset to disable
     * @param spec dataset specification
     * @throws ExploreException if operation fails
     */
    void disableExploreDataset(DatasetId datasetInstance, DatasetSpecification spec) 
        throws ExploreException;
}

Partition Management

Manage partitions for partitioned datasets including adding, dropping, and maintaining partitions.

/**
 * Partition management operations
 */
interface ExploreClient {
    /**
     * Add a partition to a partitioned dataset
     * @param datasetInstance dataset to add partition to
     * @param spec dataset specification
     * @param key partition key identifying the partition
     * @param path file system path for the partition data
     * @return future completing when partition is added
     */
    ListenableFuture<Void> addPartition(DatasetId datasetInstance, DatasetSpecification spec, 
                                       PartitionKey key, String path);
    
    /**
     * Drop a partition from a partitioned dataset
     * @param datasetInstance dataset to drop partition from
     * @param spec dataset specification
     * @param key partition key identifying the partition to drop
     * @return future completing when partition is dropped
     */
    ListenableFuture<Void> dropPartition(DatasetId datasetInstance, DatasetSpecification spec, 
                                        PartitionKey key);
    
    /**
     * Concatenate partition files for optimization
     * @param datasetInstance dataset containing the partition
     * @param spec dataset specification
     * @param key partition key identifying the partition
     * @return future completing when concatenation finishes
     */
    ListenableFuture<Void> concatenatePartition(DatasetId datasetInstance, DatasetSpecification spec, 
                                              PartitionKey key);
}

/**
 * Partition management via ExploreFacade
 */
class ExploreFacade {
    /**
     * Add partition with automatic configuration
     * @param datasetInstance dataset to add partition to
     * @param spec dataset specification
     * @param key partition key
     * @param location partition data location
     * @throws ExploreException if operation fails
     */
    void addPartition(DatasetId datasetInstance, DatasetSpecification spec, 
                     PartitionKey key, String location) throws ExploreException;
    
    /**
     * Drop partition with automatic configuration
     * @param datasetInstance dataset to drop partition from
     * @param spec dataset specification
     * @param key partition key
     * @throws ExploreException if operation fails
     */
    void dropPartition(DatasetId datasetInstance, DatasetSpecification spec, 
                      PartitionKey key) throws ExploreException;
    
    /**
     * Concatenate partition files
     * @param datasetInstance dataset containing partition
     * @param spec dataset specification
     * @param key partition key
     * @return future for async operation
     * @throws ExploreException if operation fails
     */
    ListenableFuture<Void> concatenatePartition(DatasetId datasetInstance, DatasetSpecification spec, 
                                              PartitionKey key) throws ExploreException;
}

Stream Integration

Enable and manage exploration for CDAP streams with format specifications.

/**
 * Stream exploration management
 */
interface ExploreClient {
    /**
     * Enable exploration for a stream with format specification
     * @param stream stream to enable exploration for
     * @param tableName table name to create for the stream
     * @param format format specification for parsing stream data
     * @return future completing when stream exploration is enabled
     */
    ListenableFuture<Void> enableExploreStream(StreamId stream, String tableName, 
                                              FormatSpecification format);
    
    /**
     * Disable exploration for a stream
     * @param stream stream to disable exploration for
     * @param tableName table name for the stream
     * @return future completing when stream exploration is disabled
     */
    ListenableFuture<Void> disableExploreStream(StreamId stream, String tableName);
}

/**
 * Stream exploration via ExploreFacade
 */
class ExploreFacade {
    /**
     * Enable stream exploration with format
     * @param stream stream to enable exploration for
     * @param tableName table name for the stream
     * @param format format specification
     * @throws ExploreException if operation fails
     */
    void enableExploreStream(StreamId stream, String tableName, 
                           FormatSpecification format) throws ExploreException;
    
    /**
     * Disable stream exploration
     * @param stream stream to disable exploration for
     * @param tableName table name for the stream
     * @throws ExploreException if operation fails
     */
    void disableExploreStream(StreamId stream, String tableName) throws ExploreException;
}

Namespace Management

Create and manage namespaces for dataset organization and access control.

/**
 * Namespace management operations
 */
interface Explore {
    /**
     * Create a new namespace for exploration
     * @param namespaceMeta namespace metadata and configuration
     * @return query handle for the creation operation
     * @throws ExploreException if namespace creation fails
     */
    QueryHandle createNamespace(NamespaceMeta namespaceMeta) throws ExploreException;
    
    /**
     * Delete a namespace from exploration
     * @param namespace namespace to delete
     * @return query handle for the deletion operation
     * @throws ExploreException if namespace deletion fails
     */
    QueryHandle deleteNamespace(NamespaceId namespace) throws ExploreException;
}

/**
 * Asynchronous namespace operations
 */
interface ExploreClient {
    /**
     * Add namespace asynchronously
     * @param namespaceMeta namespace metadata
     * @return future containing namespace creation results
     */
    ListenableFuture<ExploreExecutionResult> addNamespace(NamespaceMeta namespaceMeta);
    
    /**
     * Remove namespace asynchronously
     * @param namespace namespace to remove
     * @return future containing namespace removal results
     */
    ListenableFuture<ExploreExecutionResult> removeNamespace(NamespaceId namespace);
}

/**
 * Namespace operations via ExploreFacade
 */
class ExploreFacade {
    /**
     * Create namespace with automatic configuration
     * @param namespace namespace metadata
     * @throws ExploreException if creation fails
     */
    void createNamespace(NamespaceMeta namespace) throws ExploreException;
    
    /**
     * Remove namespace with cleanup
     * @param namespace namespace to remove
     * @throws ExploreException if removal fails
     */
    void removeNamespace(NamespaceId namespace) throws ExploreException;
}

Configuration Classes

Parameter classes for dataset operations with detailed configuration options.

/**
 * Parameters for enabling dataset exploration
 */
class EnableExploreParameters {
    /**
     * Get dataset instance identifier
     * @return dataset ID
     */
    public DatasetId getDatasetInstance();
    
    /**
     * Get dataset specification
     * @return dataset specification
     */
    public DatasetSpecification getSpec();
    
    /**
     * Check if truncating is enabled
     * @return true if data should be truncated
     */
    public boolean isTruncating();
}

/**
 * Parameters for disabling dataset exploration
 */
class DisableExploreParameters {
    /**
     * Get dataset instance identifier
     * @return dataset ID
     */
    public DatasetId getDatasetInstance();
    
    /**
     * Get dataset specification
     * @return dataset specification
     */
    public DatasetSpecification getSpec();
}

/**
 * Parameters for updating dataset exploration
 */
class UpdateExploreParameters {
    /**
     * Get dataset instance identifier
     * @return dataset ID
     */
    public DatasetId getDatasetInstance();
    
    /**
     * Get old dataset specification
     * @return old specification
     */
    public DatasetSpecification getOldSpec();
    
    /**
     * Get new dataset specification
     * @return new specification
     */
    public DatasetSpecification getNewSpec();
}

Usage Examples:

import co.cask.cdap.explore.client.ExploreClient;
import co.cask.cdap.explore.client.ExploreFacade;
import co.cask.cdap.proto.id.DatasetId;
import co.cask.cdap.proto.id.StreamId;
import co.cask.cdap.data2.dataset2.DatasetSpecification;
import co.cask.cdap.api.dataset.lib.PartitionKey;

// Basic dataset exploration management
ExploreClient client = // obtained via dependency injection

try {
    DatasetId dataset = new DatasetId("default", "user_events");
    
    // Enable exploration for dataset
    ListenableFuture<Void> enableFuture = client.enableExploreDataset(dataset);
    enableFuture.get(); // Wait for completion
    
    System.out.println("Dataset exploration enabled");
    
    // Query the dataset
    ListenableFuture<ExploreExecutionResult> queryFuture = 
        client.submit(new NamespaceId("default"), 
                     "SELECT COUNT(*) FROM dataset_user_events");
    
    ExploreExecutionResult result = queryFuture.get();
    if (result.hasNext()) {
        QueryResult row = result.next();
        System.out.println("Row count: " + row.getColumns().get(0));
    }
    result.close();
    
    // Disable exploration when done
    ListenableFuture<Void> disableFuture = client.disableExploreDataset(dataset);
    disableFuture.get();
    
} catch (Exception e) {
    System.err.println("Dataset operation failed: " + e.getMessage());
}

// Using ExploreFacade for simplified operations
ExploreFacade facade = // obtained via dependency injection

try {
    DatasetId dataset = new DatasetId("default", "product_catalog");
    
    // Enable with automatic configuration checking
    facade.enableExploreDataset(dataset);
    System.out.println("Explore enabled via facade");
    
    // Disable when done
    facade.disableExploreDataset(dataset);
    
} catch (ExploreException e) {
    System.err.println("Facade operation failed: " + e.getMessage());
}

// Partition management example
try {
    DatasetId partitionedDataset = new DatasetId("default", "daily_logs");
    DatasetSpecification spec = // obtained from dataset metadata
    
    // Add a new partition
    PartitionKey partitionKey = PartitionKey.builder()
        .addStringField("year", "2023")
        .addStringField("month", "09")
        .addStringField("day", "07")
        .build();
    
    String partitionPath = "/data/daily_logs/2023/09/07";
    
    ListenableFuture<Void> addFuture = 
        client.addPartition(partitionedDataset, spec, partitionKey, partitionPath);
    addFuture.get();
    
    System.out.println("Partition added successfully");
    
    // Query the specific partition
    ListenableFuture<ExploreExecutionResult> partitionQuery = 
        client.submit(new NamespaceId("default"), 
                     "SELECT * FROM dataset_daily_logs WHERE year='2023' AND month='09' AND day='07'");
    
    ExploreExecutionResult partitionResult = partitionQuery.get();
    // Process partition results...
    partitionResult.close();
    
    // Drop partition when no longer needed
    ListenableFuture<Void> dropFuture = 
        client.dropPartition(partitionedDataset, spec, partitionKey);
    dropFuture.get();
    
} catch (Exception e) {
    System.err.println("Partition operation failed: " + e.getMessage());
}

// Stream exploration example
try {
    StreamId stream = new StreamId("default", "access_logs");
    String tableName = "stream_access_logs";
    
    // Define format for log parsing
    FormatSpecification format = FormatSpecification.builder("clf")
        .setSchema(Schema.recordOf("access_log",
            Schema.Field.of("ip", Schema.of(Schema.Type.STRING)),
            Schema.Field.of("timestamp", Schema.of(Schema.Type.LONG)),
            Schema.Field.of("method", Schema.of(Schema.Type.STRING)),
            Schema.Field.of("url", Schema.of(Schema.Type.STRING)),
            Schema.Field.of("status", Schema.of(Schema.Type.INT))
        ))
        .build();
    
    // Enable stream exploration
    ListenableFuture<Void> streamEnableFuture = 
        client.enableExploreStream(stream, tableName, format);
    streamEnableFuture.get();
    
    System.out.println("Stream exploration enabled");
    
    // Query stream data
    ListenableFuture<ExploreExecutionResult> streamQuery = 
        client.submit(new NamespaceId("default"), 
                     "SELECT method, COUNT(*) FROM stream_access_logs GROUP BY method");
    
    ExploreExecutionResult streamResult = streamQuery.get();
    while (streamResult.hasNext()) {
        QueryResult row = streamResult.next();
        System.out.println("Method: " + row.getColumns().get(0) + 
                          ", Count: " + row.getColumns().get(1));
    }
    streamResult.close();
    
    // Disable stream exploration
    ListenableFuture<Void> streamDisableFuture = 
        client.disableExploreStream(stream, tableName);
    streamDisableFuture.get();
    
} catch (Exception e) {
    System.err.println("Stream operation failed: " + e.getMessage());
}

Install with Tessl CLI

npx tessl i tessl/maven-co-cask-cdap--cdap-explore-client

docs

client-operations.md

dataset-management.md

index.md

metadata-operations.md

query-execution.md

tile.json