or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

application-master.mdindex.mdresource-management.mdscheduler-backends.mdutilities.mdyarn-client.md

yarn-client.mddocs/

0

# YARN Client Management

1

2

Core client functionality for submitting and managing Spark applications on YARN clusters. Provides comprehensive application lifecycle management, resource negotiation, and monitoring capabilities.

3

4

## Capabilities

5

6

### Client Class

7

8

Main YARN client implementation for application submission and management. Available in both stable and deprecated alpha API versions.

9

10

```scala { .api }

11

/**

12

* YARN client implementation for stable API (Hadoop 2.2+)

13

* Handles application submission, monitoring, and resource management

14

*/

15

class Client(

16

args: ClientArguments,

17

hadoopConf: Configuration,

18

sparkConf: SparkConf

19

) extends ClientBase {

20

/**

21

* Stop the YARN client and clean up resources

22

*/

23

def stop(): Unit

24

}

25

26

/**

27

* Alternative constructors for simplified client creation

28

*/

29

object Client {

30

def apply(clientArgs: ClientArguments, spConf: SparkConf): Client

31

def apply(clientArgs: ClientArguments): Client

32

}

33

```

34

35

**Usage Examples:**

36

37

```scala

38

import org.apache.spark.deploy.yarn.{Client, ClientArguments}

39

import org.apache.spark.SparkConf

40

import org.apache.hadoop.conf.Configuration

41

42

// Full constructor with explicit Hadoop configuration

43

val sparkConf = new SparkConf().setAppName("MyApp")

44

val hadoopConf = new Configuration()

45

val args = Array("--jar", "myapp.jar", "--class", "MyMainClass")

46

val clientArgs = new ClientArguments(args, sparkConf)

47

val client = new Client(clientArgs, hadoopConf, sparkConf)

48

49

// Simplified constructor

50

val client2 = new Client(clientArgs, sparkConf)

51

52

// Stop client when done

53

client.stop()

54

```

55

56

### ClientBase Trait

57

58

Base trait providing core YARN client functionality shared across API versions.

59

60

```scala { .api }

61

/**

62

* Base trait for YARN client functionality

63

* Provides core application submission logic and resource management

64

*/

65

private[spark] trait ClientBase {

66

// Application submission and monitoring capabilities

67

// Resource allocation and management

68

// YARN application lifecycle management

69

}

70

71

/**

72

* Companion object with shared client utilities

73

*/

74

private[spark] object ClientBase {

75

// Shared client utility methods and constants

76

}

77

```

78

79

### ClientArguments

80

81

Configuration and argument parsing for YARN client operations. Handles all command-line arguments and configuration options for application submission.

82

83

```scala { .api }

84

/**

85

* Client configuration and argument parsing for YARN operations

86

* Parses command-line arguments and manages application submission parameters

87

*/

88

private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) {

89

/** Additional JARs to distribute with the application */

90

var addJars: String = null

91

92

/** Files to distribute to executor working directories */

93

var files: String = null

94

95

/** Archives to distribute and extract on executors */

96

var archives: String = null

97

98

/** User application JAR file */

99

var userJar: String = null

100

101

/** User application main class */

102

var userClass: String = null

103

104

/** Arguments to pass to user application */

105

var userArgs: Seq[String] = Seq[String]()

106

107

/** Executor memory in MB (default: 1024) */

108

var executorMemory: Int = 1024

109

110

/** Number of cores per executor (default: 1) */

111

var executorCores: Int = 1

112

113

/** Total number of executors to request */

114

var numExecutors: Int = DEFAULT_NUMBER_EXECUTORS

115

116

/** YARN queue name (default: "default") */

117

var amQueue: String = sparkConf.get("spark.yarn.queue", "default")

118

119

/** ApplicationMaster memory in MB (default: 512) */

120

var amMemory: Int = 512

121

122

/** Application name (default: "Spark") */

123

var appName: String = "Spark"

124

125

/** Application priority (default: 0) */

126

var priority: Int = 0

127

128

/** Additional memory overhead for ApplicationMaster container */

129

val amMemoryOverhead: Int = sparkConf.getInt("spark.yarn.driver.memoryOverhead",

130

math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toInt, MEMORY_OVERHEAD_MIN))

131

132

/** Additional memory overhead for executor containers */

133

val executorMemoryOverhead: Int = sparkConf.getInt("spark.yarn.executor.memoryOverhead",

134

math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN))

135

}

136

```

137

138

**Usage Examples:**

139

140

```scala

141

import org.apache.spark.deploy.yarn.ClientArguments

142

import org.apache.spark.SparkConf

143

144

// Basic argument configuration

145

val sparkConf = new SparkConf()

146

val args = Array(

147

"--jar", "/path/to/myapp.jar",

148

"--class", "com.example.MyMainClass",

149

"--arg", "appArg1",

150

"--arg", "appArg2",

151

"--executor-memory", "2g",

152

"--executor-cores", "2",

153

"--num-executors", "4"

154

)

155

156

val clientArgs = new ClientArguments(args, sparkConf)

157

158

// Access parsed arguments

159

println(s"User JAR: ${clientArgs.userJar}")

160

println(s"Main class: ${clientArgs.userClass}")

161

println(s"Executor memory: ${clientArgs.executorMemory} MB")

162

println(s"Number of executors: ${clientArgs.numExecutors}")

163

```

164

165

### Main Entry Points

166

167

Command-line entry points for YARN client operations.

168

169

```scala { .api }

170

/**

171

* Main entry point for YARN client operations

172

* Typically invoked by spark-submit in YARN mode

173

*/

174

object Client {

175

def main(args: Array[String]): Unit

176

}

177

```

178

179

## Configuration Options

180

181

### Required Arguments

182

183

- `--jar`: Path to user application JAR file

184

- `--class`: Main class of user application

185

186

### Optional Arguments

187

188

- `--arg <value>`: Arguments to pass to user application (can be repeated)

189

- `--executor-memory <memory>`: Memory per executor (e.g., "1g", "512m")

190

- `--executor-cores <cores>`: CPU cores per executor

191

- `--num-executors <count>`: Total number of executors

192

- `--queue <queue>`: YARN queue name

193

- `--name <name>`: Application name

194

- `--files <files>`: Comma-separated list of files to distribute

195

- `--archives <archives>`: Comma-separated list of archives to distribute

196

- `--addJars <jars>`: Comma-separated list of additional JARs

197

198

### Environment Integration

199

200

The client integrates with Spark configuration through `SparkConf` and Hadoop configuration through `Configuration` objects, allowing seamless integration with existing Spark and Hadoop setups.

201

202

```scala

203

// Configuration integration example

204

val sparkConf = new SparkConf()

205

.setAppName("MySparkApp")

206

.set("spark.executor.memory", "2g")

207

.set("spark.yarn.queue", "production")

208

209

val hadoopConf = new Configuration()

210

hadoopConf.set("yarn.nodemanager.aux-services", "mapreduce_shuffle")

211

212

val client = new Client(clientArgs, hadoopConf, sparkConf)

213

```