Tessl Tile for maven/org.apache.spark/yarn-parent_2.10@1.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

application-master.md index.md resource-management.md scheduler-backends.md utilities.md yarn-client.md

yarn-client.mddocs/

0
# YARN Client Management
1

2
Core client functionality for submitting and managing Spark applications on YARN clusters. Provides comprehensive application lifecycle management, resource negotiation, and monitoring capabilities.
3

4
## Capabilities
5

6
### Client Class
7

8
Main YARN client implementation for application submission and management. Available in both stable and deprecated alpha API versions.
9

10
```scala { .api }
11
/**
12
 * YARN client implementation for stable API (Hadoop 2.2+)
13
 * Handles application submission, monitoring, and resource management
14
 */
15
class Client(
16
  args: ClientArguments, 
17
  hadoopConf: Configuration, 
18
  sparkConf: SparkConf
19
) extends ClientBase {
20
  /**
21
   * Stop the YARN client and clean up resources
22
   */
23
  def stop(): Unit
24
}
25

26
/**
27
 * Alternative constructors for simplified client creation
28
 */
29
object Client {
30
  def apply(clientArgs: ClientArguments, spConf: SparkConf): Client
31
  def apply(clientArgs: ClientArguments): Client
32
}
33
```
34

35
**Usage Examples:**
36

37
```scala
38
import org.apache.spark.deploy.yarn.{Client, ClientArguments}
39
import org.apache.spark.SparkConf
40
import org.apache.hadoop.conf.Configuration
41

42
// Full constructor with explicit Hadoop configuration
43
val sparkConf = new SparkConf().setAppName("MyApp")
44
val hadoopConf = new Configuration()
45
val args = Array("--jar", "myapp.jar", "--class", "MyMainClass")
46
val clientArgs = new ClientArguments(args, sparkConf)
47
val client = new Client(clientArgs, hadoopConf, sparkConf)
48

49
// Simplified constructor
50
val client2 = new Client(clientArgs, sparkConf)
51

52
// Stop client when done
53
client.stop()
54
```
55

56
### ClientBase Trait
57

58
Base trait providing core YARN client functionality shared across API versions.
59

60
```scala { .api }
61
/**
62
 * Base trait for YARN client functionality
63
 * Provides core application submission logic and resource management
64
 */
65
private[spark] trait ClientBase {
66
  // Application submission and monitoring capabilities
67
  // Resource allocation and management  
68
  // YARN application lifecycle management
69
}
70

71
/**
72
 * Companion object with shared client utilities
73
 */
74
private[spark] object ClientBase {
75
  // Shared client utility methods and constants
76
}
77
```
78

79
### ClientArguments
80

81
Configuration and argument parsing for YARN client operations. Handles all command-line arguments and configuration options for application submission.
82

83
```scala { .api }
84
/**
85
 * Client configuration and argument parsing for YARN operations
86
 * Parses command-line arguments and manages application submission parameters
87
 */
88
private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) {
89
  /** Additional JARs to distribute with the application */
90
  var addJars: String = null
91
  
92
  /** Files to distribute to executor working directories */
93
  var files: String = null
94
  
95
  /** Archives to distribute and extract on executors */  
96
  var archives: String = null
97
  
98
  /** User application JAR file */
99
  var userJar: String = null
100
  
101
  /** User application main class */
102
  var userClass: String = null
103
  
104
  /** Arguments to pass to user application */
105
  var userArgs: Seq[String] = Seq[String]()
106
  
107
  /** Executor memory in MB (default: 1024) */
108
  var executorMemory: Int = 1024
109
  
110
  /** Number of cores per executor (default: 1) */
111
  var executorCores: Int = 1
112
  
113
  /** Total number of executors to request */
114
  var numExecutors: Int = DEFAULT_NUMBER_EXECUTORS
115
  
116
  /** YARN queue name (default: "default") */
117
  var amQueue: String = sparkConf.get("spark.yarn.queue", "default")
118
  
119
  /** ApplicationMaster memory in MB (default: 512) */
120
  var amMemory: Int = 512
121
  
122
  /** Application name (default: "Spark") */
123
  var appName: String = "Spark"
124
  
125
  /** Application priority (default: 0) */
126
  var priority: Int = 0
127
  
128
  /** Additional memory overhead for ApplicationMaster container */
129
  val amMemoryOverhead: Int = sparkConf.getInt("spark.yarn.driver.memoryOverhead", 
130
    math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toInt, MEMORY_OVERHEAD_MIN))
131
  
132
  /** Additional memory overhead for executor containers */
133
  val executorMemoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",
134
    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))
135
}
136
```
137

138
**Usage Examples:**
139

140
```scala
141
import org.apache.spark.deploy.yarn.ClientArguments
142
import org.apache.spark.SparkConf
143

144
// Basic argument configuration
145
val sparkConf = new SparkConf()
146
val args = Array(
147
  "--jar", "/path/to/myapp.jar",
148
  "--class", "com.example.MyMainClass",
149
  "--arg", "appArg1",
150
  "--arg", "appArg2",
151
  "--executor-memory", "2g",
152
  "--executor-cores", "2",
153
  "--num-executors", "4"
154
)
155

156
val clientArgs = new ClientArguments(args, sparkConf)
157

158
// Access parsed arguments
159
println(s"User JAR: ${clientArgs.userJar}")
160
println(s"Main class: ${clientArgs.userClass}")
161
println(s"Executor memory: ${clientArgs.executorMemory} MB")
162
println(s"Number of executors: ${clientArgs.numExecutors}")
163
```
164

165
### Main Entry Points
166

167
Command-line entry points for YARN client operations.
168

169
```scala { .api }
170
/**
171
 * Main entry point for YARN client operations
172
 * Typically invoked by spark-submit in YARN mode
173
 */
174
object Client {
175
  def main(args: Array[String]): Unit
176
}
177
```
178

179
## Configuration Options
180

181
### Required Arguments
182

183
- `--jar`: Path to user application JAR file
184
- `--class`: Main class of user application
185

186
### Optional Arguments
187

188
- `--arg <value>`: Arguments to pass to user application (can be repeated)
189
- `--executor-memory <memory>`: Memory per executor (e.g., "1g", "512m")
190
- `--executor-cores <cores>`: CPU cores per executor
191
- `--num-executors <count>`: Total number of executors
192
- `--queue <queue>`: YARN queue name
193
- `--name <name>`: Application name
194
- `--files <files>`: Comma-separated list of files to distribute
195
- `--archives <archives>`: Comma-separated list of archives to distribute
196
- `--addJars <jars>`: Comma-separated list of additional JARs
197

198
### Environment Integration
199

200
The client integrates with Spark configuration through `SparkConf` and Hadoop configuration through `Configuration` objects, allowing seamless integration with existing Spark and Hadoop setups.
201

202
```scala
203
// Configuration integration example
204
val sparkConf = new SparkConf()
205
  .setAppName("MySparkApp")
206
  .set("spark.executor.memory", "2g")
207
  .set("spark.yarn.queue", "production")
208

209
val hadoopConf = new Configuration()
210
hadoopConf.set("yarn.nodemanager.aux-services", "mapreduce_shuffle")
211

212
val client = new Client(clientArgs, hadoopConf, sparkConf)
213
```

Version

Tile

Files

yarn-client.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

yarn-client.mddocs/