CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-tensorflow--proto

Generated Java code for TensorFlow protocol buffers, providing type-safe access to TensorFlow's structured data formats including MetaGraphDef, ConfigProto, and other core TensorFlow data structures.

Overview
Eval results
Files

distributed-runtime.mddocs/

Distributed Runtime

Service interfaces and message types for distributed TensorFlow execution across multiple devices and machines. These types enable TensorFlow to run computations on clusters and coordinate between master and worker nodes.

Capabilities

Master Service Messages

The master service coordinates distributed TensorFlow sessions and manages the overall execution flow across worker nodes.

CreateSessionRequest/Response

Creates a new TensorFlow session on the cluster.

/**
 * Request to create a new session
 */
class CreateSessionRequest {
  /** Get the computation graph */
  GraphDef getGraphDef();
  
  /** Get session configuration */
  ConfigProto getConfig();
  
  /** Get target specification (e.g., "grpc://localhost:2222") */
  String getTarget();
  
  /** Create a new builder */
  static Builder newBuilder();
  
  static class Builder {
    Builder setGraphDef(GraphDef graphDef);
    Builder setConfig(ConfigProto config);
    Builder setTarget(String target);
    CreateSessionRequest build();
  }
}

/**
 * Response with created session handle
 */
class CreateSessionResponse {
  /** Get unique session handle */
  String getSessionHandle();
  
  /** Get cluster information */
  ClusterDef getClusterDef();
  
  /** Get graph version */
  int getGraphVersion();
}

RunStepRequest/Response

Executes a computation step in a distributed session.

/**
 * Request to run a computation step
 */
class RunStepRequest {
  /** Get session handle */
  String getSessionHandle();
  
  /** Get input feed mappings (tensor name -> tensor value) */
  Map<String, TensorProto> getFeedMap();
  
  /** Get output fetch names */
  List<String> getFetchList();
  
  /** Get target operation names to run */
  List<String> getTargetList();
  
  /** Get run options */
  RunOptions getOptions();
  
  /** Get partial run handle for partial execution */
  String getPartialRunHandle();
  
  /** Check if this creates a partial run */
  boolean getStoreErrorsInResponseBody();
  
  /** Create a new builder */
  static Builder newBuilder();
  
  static class Builder {
    Builder setSessionHandle(String handle);
    Builder putFeed(String key, TensorProto value);
    Builder addFetch(String fetch);
    Builder addTarget(String target);
    Builder setOptions(RunOptions options);
    Builder setPartialRunHandle(String handle);
    RunStepRequest build();
  }
}

/**
 * Response with computation results
 */
class RunStepResponse {
  /** Get output tensor values */
  List<TensorProto> getTensorList();
  
  /** Get execution metadata */
  RunMetadata getMetadata();
  
  /** Get step execution statistics */
  StepStats getStepStats();
  
  /** Get cost graph information */
  CostGraphDef getCostGraph();
  
  /** Get status code */
  int getStatusCode();
  
  /** Get error message if failed */
  String getStatusErrorMessage();
}

Usage Examples:

import org.tensorflow.distruntime.*;
import org.tensorflow.framework.*;

// Create a session on distributed cluster
CreateSessionRequest sessionRequest = CreateSessionRequest.newBuilder()
    .setGraphDef(myGraphDef)
    .setConfig(ConfigProto.newBuilder()
        .setAllowSoftPlacement(true)
        .putDeviceCount("GPU", 2)
        .putDeviceCount("CPU", 4)
        .build())
    .setTarget("grpc://chief:2222")
    .build();

// Run a training step
RunStepRequest stepRequest = RunStepRequest.newBuilder()
    .setSessionHandle(sessionHandle)
    .putFeed("input:0", inputTensor)
    .putFeed("labels:0", labelTensor)
    .addFetch("loss:0")
    .addFetch("accuracy:0")
    .addTarget("train_op")
    .setOptions(RunOptions.newBuilder()
        .setTraceLevel(RunOptions.TraceLevel.FULL_TRACE)
        .setTimeoutInMs(30000)
        .build())
    .build();

ExtendSessionRequest/Response

Extends an existing session with additional graph nodes.

/**
 * Request to extend session with new graph nodes
 */
class ExtendSessionRequest {
  /** Get session handle */
  String getSessionHandle();
  
  /** Get additional graph definition */
  GraphDef getGraphDef();
  
  /** Get current graph version */
  int getCurrentGraphVersion();
  
  /** Create a new builder */
  static Builder newBuilder();
}

/**
 * Response with updated graph version
 */
class ExtendSessionResponse {
  /** Get new graph version */
  int getNewGraphVersion();
}

ListDevicesRequest/Response

Lists available devices in the cluster.

/**
 * Request to list available devices
 */
class ListDevicesRequest {
  /** Get session handle (optional) */
  String getSessionHandle();
}

/**
 * Response with device information
 */
class ListDevicesResponse {
  /** Get list of local devices */
  List<DeviceAttributes> getLocalDeviceList();
  
  /** Get list of remote devices */
  List<DeviceAttributes> getRemoteDeviceList();
}

Worker Service Messages

Worker services execute graph partitions on individual machines in a distributed setup.

RegisterGraphRequest/Response

Registers a graph partition on a worker node.

/**
 * Request to register graph partition on worker
 */
class RegisterGraphRequest {
  /** Get session handle */
  String getSessionHandle();
  
  /** Check if this creates a new session */
  boolean getCreateWorkerSessionOnly();
  
  /** Get graph definition */
  GraphDef getGraphDef();
  
  /** Check if variables should be initialized */
  boolean getHasControlDependencies();
  
  /** Get graph options */
  GraphOptions getGraphOptions();
  
  /** Get debug options */
  DebugOptions getDebugOptions();
  
  /** Get collective graph key */
  long getCollectiveGraphKey();
  
  /** Create a new builder */
  static Builder newBuilder();
}

/**
 * Response after registering graph
 */
class RegisterGraphResponse {
  /** Get graph handle for future operations */
  String getGraphHandle();
}

RunGraphRequest/Response

Executes a registered graph partition.

/**
 * Request to run registered graph partition
 */
class RunGraphRequest {
  /** Get session handle */
  String getSessionHandle();
  
  /** Get graph handle */
  String getGraphHandle();
  
  /** Get step ID for coordination */
  long getStepId();
  
  /** Get execution count */
  long getExecCount();
  
  /** Get input tensors */
  List<NamedTensorProto> getSendList();
  
  /** Get output tensor names */
  List<String> getRecvKeyList();
  
  /** Check if this is a partial run */
  boolean getIsPartial();
  
  /** Check if this is the last partial run */
  boolean getIsLastPartialRun();
  
  /** Create a new builder */
  static Builder newBuilder();
}

/**
 * Response with computation results
 */
class RunGraphResponse {
  /** Get output tensors */
  List<NamedTensorProto> getRecvList();
  
  /** Get step execution statistics */
  StepStats getStepStats();
  
  /** Get cost graph */
  CostGraphDef getCostGraph();
  
  /** Get partition graphs executed */
  List<GraphDef> getPartitionGraphList();
}

Usage Examples:

import org.tensorflow.distruntime.*;

// Register a graph partition on worker
RegisterGraphRequest registerRequest = RegisterGraphRequest.newBuilder()
    .setSessionHandle(sessionHandle)
    .setGraphDef(partitionedGraph)
    .setHasControlDependencies(true)
    .setGraphOptions(GraphOptions.newBuilder()
        .setEnableRecvScheduling(true)
        .build())
    .build();

// Execute the registered graph
RunGraphRequest runRequest = RunGraphRequest.newBuilder()
    .setSessionHandle(sessionHandle)
    .setGraphHandle(graphHandle)
    .setStepId(currentStepId)
    .addSend(NamedTensorProto.newBuilder()
        .setName("input_partition:0")
        .setTensor(inputTensor)
        .build())
    .addRecvKey("output_partition:0")
    .setIsPartial(false)
    .build();

Eager Service Messages

Services for TensorFlow's eager execution mode, allowing operations to be executed immediately.

CreateContextRequest/Response

Creates an eager execution context.

/**
 * Request to create eager execution context
 */
class CreateContextRequest {
  /** Get server definition */
  ServerDef getServerDef();
  
  /** Check if async execution is enabled */
  boolean getAsync();
  
  /** Get keep alive interval in seconds */
  int getKeepAliveSecs();
  
  /** Get version compatibility requirements */
  VersionDef getVersionDef();
  
  /** Get cluster device filters */
  ClusterDeviceFilters getClusterDeviceFilters();
  
  /** Create a new builder */
  static Builder newBuilder();
}

/**
 * Response with context information
 */
class CreateContextResponse {
  /** Get context ID */
  long getContextId();
  
  /** Get context view ID */
  long getContextViewId();
  
  /** Get device attributes */
  List<DeviceAttributes> getDeviceAttributesList();
}

EnqueueRequest/Response

Enqueues operations for eager execution.

/**
 * Request to enqueue eager operations
 */
class EnqueueRequest {
  /** Get context ID */
  long getContextId();
  
  /** Get list of operations to execute */
  List<Operation> getQueueList();
  
  /** Operation definition for eager execution */
  static class Operation {
    /** Get operation ID */
    long getId();
    
    /** Get operation name */
    String getName();
    
    /** Get operation attributes */
    Map<String, AttrValue> getAttrsMap();
    
    /** Get input handles */
    List<RemoteTensorHandle> getInputsList();
    
    /** Get control input operation IDs */
    List<Long> getControlOpIdsList();
    
    /** Get device name */
    String getDevice();
    
    /** Check if operation is a function */
    boolean getIsFunction();
  }
}

/**
 * Response with operation results
 */
class EnqueueResponse {
  /** Get list of operation results */
  List<QueueResponse> getQueueResponseList();
  
  /** Response for individual operations */
  static class QueueResponse {
    /** Get output tensor handles */
    List<TensorHandle> getTensorList();
    
    /** Get output shapes */
    List<TensorShapeProto> getShapeList();
  }
}

Common Distributed Types

RunOptions

Options for controlling step execution behavior.

/**
 * Options for controlling step execution behavior
 */
class RunOptions {
  /** Get trace level for profiling */
  TraceLevel getTraceLevel();
  
  /** Get timeout in milliseconds */
  long getTimeoutInMs();
  
  /** Get inter-op thread pool setting */
  int getInterOpThreadPool();
  
  /** Check if output partition graphs is enabled */
  boolean getOutputPartitionGraphs();
  
  /** Get debug options */
  DebugOptions getDebugOptions();
  
  /** Check if report tensor allocations during execution */
  boolean getReportTensorAllocationsUponOom();
  
  /** Get experimental options */
  Experimental getExperimental();
  
  static Builder newBuilder();
  
  static class Builder {
    Builder setTraceLevel(TraceLevel level);
    Builder setTimeoutInMs(long timeout);
    Builder setInterOpThreadPool(int pool);
    Builder setOutputPartitionGraphs(boolean output);
    Builder setDebugOptions(DebugOptions options);
    RunOptions build();
  }
  
  enum TraceLevel {
    NO_TRACE,
    SOFTWARE_TRACE,
    HARDWARE_TRACE,
    FULL_TRACE
  }
  
  static class Experimental {
    int getCollectiveGraphKey();
    boolean getUseRunHandler();
  }
}

RunMetadata

Metadata returned from step execution.

/**
 * Metadata returned from step execution
 */
class RunMetadata {
  /** Get step execution statistics */
  StepStats getStepStats();
  
  /** Get cost graph information */
  CostGraphDef getCostGraph();
  
  /** Get partition graphs that were executed */
  List<GraphDef> getPartitionGraphsList();
  
  /** Get function graphs that were executed */
  List<GraphDef> getFunctionGraphsList();
  
  static Builder newBuilder();
}

CostGraphDef

Cost model information for operations.

/**
 * Cost model information for operations
 */
class CostGraphDef {
  /** Get cost information for each node */
  List<Node> getNodeList();
  
  /** Cost information for a single node */
  static class Node {
    /** Get node name */
    String getName();
    
    /** Get device name */
    String getDevice();
    
    /** Get node ID */
    int getId();
    
    /** Get input information */
    List<InputInfo> getInputInfoList();
    
    /** Get output information */
    List<OutputInfo> getOutputInfoList();
    
    /** Get temporary memory used */
    long getTempMemorySize();
    
    /** Get persistent memory used */
    long getPersistentMemorySize();
    
    /** Get compute cost */
    long getComputeCost();
    
    /** Get compute time */
    long getComputeTime();
    
    /** Get memory time */
    long getMemoryTime();
    
    /** Check if this is the final node */
    boolean getIsFinal();
    
    /** Get control input nodes */
    List<Integer> getControlInputList();
    
    /** Check if inaccurate */
    boolean getInaccurate();
  }
  
  /** Input information for cost calculation */
  static class InputInfo {
    int getPrecedingNode();
    int getPrecedingPort();
  }
  
  /** Output information for cost calculation */
  static class OutputInfo {
    long getSize();
    long getAliasInputPort();
    TensorShapeProto getShape();
    DataType getDtype();
  }
}

ClusterDef

Defines the cluster topology and job configurations.

/**
 * Cluster topology definition
 */
class ClusterDef {
  /** Get job definitions */
  Map<String, JobDef> getJobMap();
  
  /** Job definition within cluster */
  static class JobDef {
    /** Get job name */
    String getName();
    
    /** Get task index to address mapping */
    Map<Integer, String> getTasksMap();
  }
}

ServerDef

Defines server configuration for distributed execution.

/**
 * Server configuration for distributed execution
 */
class ServerDef {
  /** Get cluster definition */
  ClusterDef getCluster();
  
  /** Get job name for this server */
  String getJobName();
  
  /** Get task index for this server */
  int getTaskIndex();
  
  /** Get default session configuration */
  ConfigProto getDefaultSessionConfig();
  
  /** Get server protocol (e.g., "grpc") */
  String getProtocol();
  
  /** Get server port */
  int getPort();
}

Usage Examples:

import org.tensorflow.distruntime.*;

// Define a cluster with chief and workers
ClusterDef cluster = ClusterDef.newBuilder()
    .putJob("chief", JobDef.newBuilder()
        .setName("chief")
        .putTasks(0, "chief:2222")
        .build())
    .putJob("worker", JobDef.newBuilder()
        .setName("worker")
        .putTasks(0, "worker0:2222")
        .putTasks(1, "worker1:2222")
        .putTasks(2, "worker2:2222")
        .build())
    .putJob("ps", JobDef.newBuilder()
        .setName("ps")
        .putTasks(0, "ps0:2222")
        .putTasks(1, "ps1:2222")
        .build())
    .build();

// Configure server as worker
ServerDef serverDef = ServerDef.newBuilder()
    .setCluster(cluster)
    .setJobName("worker")
    .setTaskIndex(0)
    .setProtocol("grpc")
    .setPort(2222)
    .setDefaultSessionConfig(ConfigProto.newBuilder()
        .setAllowSoftPlacement(true)
        .build())
    .build();

Install with Tessl CLI

npx tessl i tessl/maven-org-tensorflow--proto

docs

config-persistence.md

core-framework.md

data-examples.md

distributed-runtime.md

index.md

utilities.md

tile.json