tessl/maven-org-apache-flink--flink-ml-uber-2-11

Comprehensive machine learning library for Apache Flink that enables scalable ML pipelines on distributed stream processing platform.

—

Pending

Overview

Eval results

Files

Environment Management

Name: tessl/maven-org-apache-flink--flink-ml-uber-2-11
Author: tessl

ML execution context management for Flink batch and stream environments. Provides centralized access to execution contexts and table environments with support for multiple concurrent ML environments.

Capabilities

MLEnvironment Class

Core class that stores Flink execution contexts for both batch and stream processing.

/**
 * Stores Flink execution contexts for ML operations
 * Provides access to both batch and stream environments
 */
public class MLEnvironment {
    
    /** Create ML environment with default settings */
    public MLEnvironment();
    
    /** Create ML environment with batch-only contexts */
    public MLEnvironment(ExecutionEnvironment batchEnv, 
                        BatchTableEnvironment batchTableEnv);
    
    /** Create ML environment with stream-only contexts */
    public MLEnvironment(StreamExecutionEnvironment streamEnv, 
                        StreamTableEnvironment streamTableEnv);
    
    /** Create ML environment with both batch and stream contexts */
    public MLEnvironment(ExecutionEnvironment batchEnv, 
                        BatchTableEnvironment batchTableEnv,
                        StreamExecutionEnvironment streamEnv, 
                        StreamTableEnvironment streamTableEnv);
    
    /** Get batch execution environment */
    public ExecutionEnvironment getExecutionEnvironment();
    
    /** Get stream execution environment */
    public StreamExecutionEnvironment getStreamExecutionEnvironment();
    
    /** Get batch table environment */
    public BatchTableEnvironment getBatchTableEnvironment();
    
    /** Get stream table environment */
    public StreamTableEnvironment getStreamTableEnvironment();
}

Usage Examples:

import org.apache.flink.ml.common.MLEnvironment;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

// Create custom ML environment
ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
BatchTableEnvironment batchTableEnv = BatchTableEnvironment.create(batchEnv);
StreamTableEnvironment streamTableEnv = StreamTableEnvironment.create(streamEnv);

MLEnvironment mlEnv = new MLEnvironment(batchEnv, batchTableEnv, streamEnv, streamTableEnv);

// Access environments
ExecutionEnvironment batchExecEnv = mlEnv.getExecutionEnvironment();
StreamExecutionEnvironment streamExecEnv = mlEnv.getStreamExecutionEnvironment();
BatchTableEnvironment batchTblEnv = mlEnv.getBatchTableEnvironment();
StreamTableEnvironment streamTblEnv = mlEnv.getStreamTableEnvironment();

// Use for ML operations
Table batchData = batchTblEnv.fromDataSet(/* dataset */);
Table streamData = streamTblEnv.fromDataStream(/* datastream */);

MLEnvironmentFactory Class

Factory class for managing multiple ML environments with unique identifiers.

/**
 * Factory for managing MLEnvironment instances
 * Supports multiple concurrent ML environments with unique IDs
 */
public class MLEnvironmentFactory {
    
    /** Default ML environment ID */
    public static final Long DEFAULT_ML_ENVIRONMENT_ID = 0L;
    
    /** Get ML environment by ID */
    public static MLEnvironment get(Long mlEnvId);
    
    /** Get default ML environment */
    public static MLEnvironment getDefault();
    
    /** Generate new unique ML environment ID */
    public static Long getNewMLEnvironmentId();
    
    /** Register ML environment and return its ID */
    public static Long registerMLEnvironment(MLEnvironment env);
    
    /** Remove ML environment and return removed instance */
    public static MLEnvironment remove(Long mlEnvId);
}

Usage Examples:

import org.apache.flink.ml.common.MLEnvironmentFactory;

// Use default environment
MLEnvironment defaultEnv = MLEnvironmentFactory.getDefault();

// Create and register custom environment
MLEnvironment customEnv = new MLEnvironment(/* custom settings */);
Long customEnvId = MLEnvironmentFactory.registerMLEnvironment(customEnv);

// Retrieve registered environment
MLEnvironment retrieved = MLEnvironmentFactory.get(customEnvId);

// Generate new environment ID for manual management
Long newId = MLEnvironmentFactory.getNewMLEnvironmentId();

// Remove environment when done
MLEnvironment removed = MLEnvironmentFactory.remove(customEnvId);

HasMLEnvironmentId Interface

Parameter interface for ML components that need to specify which environment to use.

/**
 * Parameter interface for ML environment ID specification
 * @param <T> The implementing class type for method chaining
 */
public interface HasMLEnvironmentId<T> extends WithParams<T> {
    
    /** ML environment ID parameter */
    ParamInfo<Long> ML_ENVIRONMENT_ID = ParamInfoFactory
        .createParamInfo("mlEnvironmentId", Long.class)
        .setDescription("ML environment ID")
        .setHasDefaultValue(MLEnvironmentFactory.DEFAULT_ML_ENVIRONMENT_ID)
        .build();
    
    /** Get ML environment ID */
    default Long getMLEnvironmentId() {
        return get(ML_ENVIRONMENT_ID);
    }
    
    /** Set ML environment ID */
    default T setMLEnvironmentId(Long value) {
        return set(ML_ENVIRONMENT_ID, value);
    }
}

Usage Examples:

// ML component using environment ID
public class MyMLAlgorithm extends EstimatorBase<MyMLAlgorithm, MyMLModel> 
                           implements HasMLEnvironmentId<MyMLAlgorithm> {
    
    @Override
    protected MyMLModel fit(BatchOperator input) {
        // Get the specified ML environment
        Long envId = getMLEnvironmentId();
        MLEnvironment mlEnv = MLEnvironmentFactory.get(envId);
        
        // Use environment for operations
        BatchTableEnvironment tEnv = mlEnv.getBatchTableEnvironment();
        
        // Training logic using the specified environment
        // ...
        
        return new MyMLModel(this.getParams());
    }
}

// Usage with specific environment
MyMLAlgorithm algorithm = new MyMLAlgorithm()
    .setMLEnvironmentId(customEnvId)
    .setMaxIter(100);

MyMLModel model = algorithm.fit(trainingData);