0
# Session Management
1
2
Core session management and configuration for Spark SQL applications. SparkSession serves as the main entry point providing access to all functionality, replacing the older SQLContext from Spark 1.x.
3
4
## Capabilities
5
6
### SparkSession Creation
7
8
Primary entry point for all Spark SQL functionality. SparkSession provides a unified interface for working with structured data.
9
10
```scala { .api }
11
/**
12
* Main entry point for Spark SQL functionality
13
*/
14
class SparkSession {
15
/** Runtime configuration interface */
16
def conf: RuntimeConfig
17
18
/** Catalog interface for metadata operations */
19
def catalog: Catalog
20
21
/** UDF registration interface */
22
def udf: UDFRegistration
23
24
/** Streaming query manager */
25
def streams: StreamingQueryManager
26
27
/** Execute SQL query and return DataFrame */
28
def sql(sqlText: String): DataFrame
29
30
/** Interface for reading data in batch mode */
31
def read: DataFrameReader
32
33
/** Interface for reading streaming data */
34
def readStream: DataStreamReader
35
36
/** Get table as DataFrame */
37
def table(tableName: String): DataFrame
38
39
/** Create empty DataFrame */
40
def emptyDataFrame: DataFrame
41
42
/** Create empty Dataset */
43
def emptyDataset[T: Encoder]: Dataset[T]
44
45
/** Create DataFrame from RDD of Rows */
46
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame
47
48
/** Create DataFrame from local Seq of Rows */
49
def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame
50
51
/** Create Dataset from local sequence */
52
def createDataset[T: Encoder](data: Seq[T]): Dataset[T]
53
54
/** Create DataFrame with range of numbers */
55
def range(end: Long): Dataset[Long]
56
def range(start: Long, end: Long): Dataset[Long]
57
def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Long]
58
59
/** Stop the SparkSession */
60
def stop(): Unit
61
62
/** Spark version */
63
def version: String
64
}
65
```
66
67
### SparkSession Builder
68
69
Builder pattern for creating SparkSession instances with custom configuration.
70
71
```scala { .api }
72
/**
73
* Builder for SparkSession creation
74
*/
75
class Builder {
76
/** Set application name */
77
def appName(name: String): Builder
78
79
/** Set master URL */
80
def master(master: String): Builder
81
82
/** Set configuration option */
83
def config(key: String, value: String): Builder
84
def config(key: String, value: Long): Builder
85
def config(key: String, value: Double): Builder
86
def config(key: String, value: Boolean): Builder
87
88
/** Enable Hive support */
89
def enableHiveSupport(): Builder
90
91
/** Get or create SparkSession */
92
def getOrCreate(): SparkSession
93
}
94
95
object SparkSession {
96
/** Create new Builder */
97
def builder(): Builder
98
99
/** Get currently active SparkSession */
100
def active: SparkSession
101
102
/** Set currently active SparkSession */
103
def setActiveSession(session: SparkSession): Unit
104
105
/** Clear active SparkSession */
106
def clearActiveSession(): Unit
107
}
108
```
109
110
**Usage Example:**
111
112
```scala
113
import org.apache.spark.sql.SparkSession
114
115
// Create SparkSession with configuration
116
val spark = SparkSession.builder()
117
.appName("My Spark Application")
118
.master("local[4]")
119
.config("spark.sql.warehouse.dir", "/path/to/warehouse")
120
.config("spark.sql.adaptive.enabled", "true")
121
.config("spark.sql.adaptive.coalescePartitions.enabled", "true")
122
.enableHiveSupport()
123
.getOrCreate()
124
125
// Use the session
126
val df = spark.sql("SELECT * FROM my_table")
127
df.show()
128
129
// Clean up
130
spark.stop()
131
```
132
133
### Runtime Configuration
134
135
Interface for managing Spark configuration at runtime.
136
137
```scala { .api }
138
/**
139
* Runtime configuration interface for Spark
140
*/
141
class RuntimeConfig {
142
/** Set configuration value */
143
def set(key: String, value: String): Unit
144
def set(key: String, value: Boolean): Unit
145
def set(key: String, value: Long): Unit
146
147
/** Get configuration value */
148
def get(key: String): String
149
def get(key: String, defaultValue: String): String
150
151
/** Get all configuration values */
152
def getAll: Map[String, String]
153
154
/** Remove configuration */
155
def unset(key: String): Unit
156
157
/** Check if configuration is modifiable */
158
def isModifiable(key: String): Boolean
159
}
160
```
161
162
**Usage Example:**
163
164
```scala
165
// Configure at runtime
166
spark.conf.set("spark.sql.shuffle.partitions", "200")
167
spark.conf.set("spark.sql.adaptive.enabled", true)
168
169
// Read configuration
170
val shufflePartitions = spark.conf.get("spark.sql.shuffle.partitions")
171
val isAdaptiveEnabled = spark.conf.get("spark.sql.adaptive.enabled", "false").toBoolean
172
173
// View all configuration
174
val allConfigs = spark.conf.getAll
175
allConfigs.foreach { case (key, value) => println(s"$key: $value") }
176
```
177
178
### SQLContext (Deprecated)
179
180
Legacy entry point maintained for backward compatibility with Spark 1.x applications.
181
182
```scala { .api }
183
/**
184
* Legacy entry point for Spark SQL (deprecated in favor of SparkSession)
185
*/
186
class SQLContext(sparkContext: SparkContext) {
187
/** Get or create SparkSession */
188
def sparkSession: SparkSession
189
190
/** Execute SQL query */
191
def sql(sqlText: String): DataFrame
192
193
/** Read interface */
194
def read: DataFrameReader
195
196
/** UDF registration */
197
def udf: UDFRegistration
198
199
/** Create DataFrame */
200
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame
201
}
202
203
object SQLContext {
204
/** Get active SQLContext */
205
def getOrCreate(sparkContext: SparkContext): SQLContext
206
}
207
```
208
209
## Common Patterns
210
211
### Local Development Setup
212
213
```scala
214
val spark = SparkSession.builder()
215
.appName("Local Development")
216
.master("local[*]") // Use all available cores
217
.config("spark.sql.warehouse.dir", "spark-warehouse")
218
.getOrCreate()
219
220
import spark.implicits._ // For DataFrame operations
221
```
222
223
### Cluster Deployment Setup
224
225
```scala
226
val spark = SparkSession.builder()
227
.appName("Production Application")
228
// Master set by spark-submit
229
.config("spark.sql.adaptive.enabled", "true")
230
.config("spark.sql.adaptive.coalescePartitions.enabled", "true")
231
.config("spark.sql.adaptive.skewJoin.enabled", "true")
232
.getOrCreate()
233
```
234
235
### Configuration Management
236
237
```scala
238
// Check if running in cluster mode
239
val isCluster = spark.conf.get("spark.master").startsWith("yarn") ||
240
spark.conf.get("spark.master").startsWith("mesos")
241
242
// Adjust configuration based on environment
243
if (isCluster) {
244
spark.conf.set("spark.sql.shuffle.partitions", "400")
245
} else {
246
spark.conf.set("spark.sql.shuffle.partitions", "4")
247
}
248
```