or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

catalog.mdcolumns-functions.mddata-io.mddataset-dataframe.mdindex.mdsession-management.mdstreaming.mdtypes-encoders.mdudfs.md

session-management.mddocs/

0

# Session Management

1

2

Core session management and configuration for Spark SQL applications. SparkSession serves as the main entry point providing access to all functionality, replacing the older SQLContext from Spark 1.x.

3

4

## Capabilities

5

6

### SparkSession Creation

7

8

Primary entry point for all Spark SQL functionality. SparkSession provides a unified interface for working with structured data.

9

10

```scala { .api }

11

/**

12

* Main entry point for Spark SQL functionality

13

*/

14

class SparkSession {

15

/** Runtime configuration interface */

16

def conf: RuntimeConfig

17

18

/** Catalog interface for metadata operations */

19

def catalog: Catalog

20

21

/** UDF registration interface */

22

def udf: UDFRegistration

23

24

/** Streaming query manager */

25

def streams: StreamingQueryManager

26

27

/** Execute SQL query and return DataFrame */

28

def sql(sqlText: String): DataFrame

29

30

/** Interface for reading data in batch mode */

31

def read: DataFrameReader

32

33

/** Interface for reading streaming data */

34

def readStream: DataStreamReader

35

36

/** Get table as DataFrame */

37

def table(tableName: String): DataFrame

38

39

/** Create empty DataFrame */

40

def emptyDataFrame: DataFrame

41

42

/** Create empty Dataset */

43

def emptyDataset[T: Encoder]: Dataset[T]

44

45

/** Create DataFrame from RDD of Rows */

46

def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame

47

48

/** Create DataFrame from local Seq of Rows */

49

def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame

50

51

/** Create Dataset from local sequence */

52

def createDataset[T: Encoder](data: Seq[T]): Dataset[T]

53

54

/** Create DataFrame with range of numbers */

55

def range(end: Long): Dataset[Long]

56

def range(start: Long, end: Long): Dataset[Long]

57

def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Long]

58

59

/** Stop the SparkSession */

60

def stop(): Unit

61

62

/** Spark version */

63

def version: String

64

}

65

```

66

67

### SparkSession Builder

68

69

Builder pattern for creating SparkSession instances with custom configuration.

70

71

```scala { .api }

72

/**

73

* Builder for SparkSession creation

74

*/

75

class Builder {

76

/** Set application name */

77

def appName(name: String): Builder

78

79

/** Set master URL */

80

def master(master: String): Builder

81

82

/** Set configuration option */

83

def config(key: String, value: String): Builder

84

def config(key: String, value: Long): Builder

85

def config(key: String, value: Double): Builder

86

def config(key: String, value: Boolean): Builder

87

88

/** Enable Hive support */

89

def enableHiveSupport(): Builder

90

91

/** Get or create SparkSession */

92

def getOrCreate(): SparkSession

93

}

94

95

object SparkSession {

96

/** Create new Builder */

97

def builder(): Builder

98

99

/** Get currently active SparkSession */

100

def active: SparkSession

101

102

/** Set currently active SparkSession */

103

def setActiveSession(session: SparkSession): Unit

104

105

/** Clear active SparkSession */

106

def clearActiveSession(): Unit

107

}

108

```

109

110

**Usage Example:**

111

112

```scala

113

import org.apache.spark.sql.SparkSession

114

115

// Create SparkSession with configuration

116

val spark = SparkSession.builder()

117

.appName("My Spark Application")

118

.master("local[4]")

119

.config("spark.sql.warehouse.dir", "/path/to/warehouse")

120

.config("spark.sql.adaptive.enabled", "true")

121

.config("spark.sql.adaptive.coalescePartitions.enabled", "true")

122

.enableHiveSupport()

123

.getOrCreate()

124

125

// Use the session

126

val df = spark.sql("SELECT * FROM my_table")

127

df.show()

128

129

// Clean up

130

spark.stop()

131

```

132

133

### Runtime Configuration

134

135

Interface for managing Spark configuration at runtime.

136

137

```scala { .api }

138

/**

139

* Runtime configuration interface for Spark

140

*/

141

class RuntimeConfig {

142

/** Set configuration value */

143

def set(key: String, value: String): Unit

144

def set(key: String, value: Boolean): Unit

145

def set(key: String, value: Long): Unit

146

147

/** Get configuration value */

148

def get(key: String): String

149

def get(key: String, defaultValue: String): String

150

151

/** Get all configuration values */

152

def getAll: Map[String, String]

153

154

/** Remove configuration */

155

def unset(key: String): Unit

156

157

/** Check if configuration is modifiable */

158

def isModifiable(key: String): Boolean

159

}

160

```

161

162

**Usage Example:**

163

164

```scala

165

// Configure at runtime

166

spark.conf.set("spark.sql.shuffle.partitions", "200")

167

spark.conf.set("spark.sql.adaptive.enabled", true)

168

169

// Read configuration

170

val shufflePartitions = spark.conf.get("spark.sql.shuffle.partitions")

171

val isAdaptiveEnabled = spark.conf.get("spark.sql.adaptive.enabled", "false").toBoolean

172

173

// View all configuration

174

val allConfigs = spark.conf.getAll

175

allConfigs.foreach { case (key, value) => println(s"$key: $value") }

176

```

177

178

### SQLContext (Deprecated)

179

180

Legacy entry point maintained for backward compatibility with Spark 1.x applications.

181

182

```scala { .api }

183

/**

184

* Legacy entry point for Spark SQL (deprecated in favor of SparkSession)

185

*/

186

class SQLContext(sparkContext: SparkContext) {

187

/** Get or create SparkSession */

188

def sparkSession: SparkSession

189

190

/** Execute SQL query */

191

def sql(sqlText: String): DataFrame

192

193

/** Read interface */

194

def read: DataFrameReader

195

196

/** UDF registration */

197

def udf: UDFRegistration

198

199

/** Create DataFrame */

200

def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame

201

}

202

203

object SQLContext {

204

/** Get active SQLContext */

205

def getOrCreate(sparkContext: SparkContext): SQLContext

206

}

207

```

208

209

## Common Patterns

210

211

### Local Development Setup

212

213

```scala

214

val spark = SparkSession.builder()

215

.appName("Local Development")

216

.master("local[*]") // Use all available cores

217

.config("spark.sql.warehouse.dir", "spark-warehouse")

218

.getOrCreate()

219

220

import spark.implicits._ // For DataFrame operations

221

```

222

223

### Cluster Deployment Setup

224

225

```scala

226

val spark = SparkSession.builder()

227

.appName("Production Application")

228

// Master set by spark-submit

229

.config("spark.sql.adaptive.enabled", "true")

230

.config("spark.sql.adaptive.coalescePartitions.enabled", "true")

231

.config("spark.sql.adaptive.skewJoin.enabled", "true")

232

.getOrCreate()

233

```

234

235

### Configuration Management

236

237

```scala

238

// Check if running in cluster mode

239

val isCluster = spark.conf.get("spark.master").startsWith("yarn") ||

240

spark.conf.get("spark.master").startsWith("mesos")

241

242

// Adjust configuration based on environment

243

if (isCluster) {

244

spark.conf.set("spark.sql.shuffle.partitions", "400")

245

} else {

246

spark.conf.set("spark.sql.shuffle.partitions", "4")

247

}

248

```