Tessl Tile for maven/org.apache.spark/spark-sql_2.12@2.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

catalog.md columns-functions.md data-io.md dataset-dataframe.md index.md session-management.md streaming.md types-encoders.md udfs.md

session-management.mddocs/

0
# Session Management
1

2
Core session management and configuration for Spark SQL applications. SparkSession serves as the main entry point providing access to all functionality, replacing the older SQLContext from Spark 1.x.
3

4
## Capabilities
5

6
### SparkSession Creation
7

8
Primary entry point for all Spark SQL functionality. SparkSession provides a unified interface for working with structured data.
9

10
```scala { .api }
11
/**
12
 * Main entry point for Spark SQL functionality
13
 */
14
class SparkSession {
15
  /** Runtime configuration interface */
16
  def conf: RuntimeConfig
17
  
18
  /** Catalog interface for metadata operations */
19
  def catalog: Catalog
20
  
21
  /** UDF registration interface */
22
  def udf: UDFRegistration
23
  
24
  /** Streaming query manager */
25
  def streams: StreamingQueryManager
26
  
27
  /** Execute SQL query and return DataFrame */
28
  def sql(sqlText: String): DataFrame
29
  
30
  /** Interface for reading data in batch mode */
31
  def read: DataFrameReader
32
  
33
  /** Interface for reading streaming data */
34
  def readStream: DataStreamReader
35
  
36
  /** Get table as DataFrame */
37
  def table(tableName: String): DataFrame
38
  
39
  /** Create empty DataFrame */
40
  def emptyDataFrame: DataFrame
41
  
42
  /** Create empty Dataset */
43
  def emptyDataset[T: Encoder]: Dataset[T]
44
  
45
  /** Create DataFrame from RDD of Rows */
46
  def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame
47
  
48
  /** Create DataFrame from local Seq of Rows */
49
  def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame
50
  
51
  /** Create Dataset from local sequence */
52
  def createDataset[T: Encoder](data: Seq[T]): Dataset[T]
53
  
54
  /** Create DataFrame with range of numbers */
55
  def range(end: Long): Dataset[Long]
56
  def range(start: Long, end: Long): Dataset[Long]
57
  def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Long]
58
  
59
  /** Stop the SparkSession */
60
  def stop(): Unit
61
  
62
  /** Spark version */
63
  def version: String
64
}
65
```
66

67
### SparkSession Builder
68

69
Builder pattern for creating SparkSession instances with custom configuration.
70

71
```scala { .api }
72
/**
73
 * Builder for SparkSession creation
74
 */
75
class Builder {
76
  /** Set application name */
77
  def appName(name: String): Builder
78
  
79
  /** Set master URL */
80
  def master(master: String): Builder
81
  
82
  /** Set configuration option */
83
  def config(key: String, value: String): Builder
84
  def config(key: String, value: Long): Builder
85
  def config(key: String, value: Double): Builder
86
  def config(key: String, value: Boolean): Builder
87
  
88
  /** Enable Hive support */
89
  def enableHiveSupport(): Builder
90
  
91
  /** Get or create SparkSession */
92
  def getOrCreate(): SparkSession
93
}
94

95
object SparkSession {
96
  /** Create new Builder */
97
  def builder(): Builder
98
  
99
  /** Get currently active SparkSession */
100
  def active: SparkSession
101
  
102
  /** Set currently active SparkSession */
103
  def setActiveSession(session: SparkSession): Unit
104
  
105
  /** Clear active SparkSession */
106
  def clearActiveSession(): Unit
107
}
108
```
109

110
**Usage Example:**
111

112
```scala
113
import org.apache.spark.sql.SparkSession
114

115
// Create SparkSession with configuration
116
val spark = SparkSession.builder()
117
  .appName("My Spark Application")
118
  .master("local[4]")
119
  .config("spark.sql.warehouse.dir", "/path/to/warehouse")
120
  .config("spark.sql.adaptive.enabled", "true")
121
  .config("spark.sql.adaptive.coalescePartitions.enabled", "true")
122
  .enableHiveSupport()
123
  .getOrCreate()
124

125
// Use the session
126
val df = spark.sql("SELECT * FROM my_table")
127
df.show()
128

129
// Clean up
130
spark.stop()
131
```
132

133
### Runtime Configuration
134

135
Interface for managing Spark configuration at runtime.
136

137
```scala { .api }
138
/**
139
 * Runtime configuration interface for Spark
140
 */
141
class RuntimeConfig {
142
  /** Set configuration value */
143
  def set(key: String, value: String): Unit
144
  def set(key: String, value: Boolean): Unit
145
  def set(key: String, value: Long): Unit
146
  
147
  /** Get configuration value */
148
  def get(key: String): String
149
  def get(key: String, defaultValue: String): String
150
  
151
  /** Get all configuration values */
152
  def getAll: Map[String, String]
153
  
154
  /** Remove configuration */
155
  def unset(key: String): Unit
156
  
157
  /** Check if configuration is modifiable */
158
  def isModifiable(key: String): Boolean
159
}
160
```
161

162
**Usage Example:**
163

164
```scala
165
// Configure at runtime
166
spark.conf.set("spark.sql.shuffle.partitions", "200")
167
spark.conf.set("spark.sql.adaptive.enabled", true)
168

169
// Read configuration
170
val shufflePartitions = spark.conf.get("spark.sql.shuffle.partitions")
171
val isAdaptiveEnabled = spark.conf.get("spark.sql.adaptive.enabled", "false").toBoolean
172

173
// View all configuration
174
val allConfigs = spark.conf.getAll
175
allConfigs.foreach { case (key, value) => println(s"$key: $value") }
176
```
177

178
### SQLContext (Deprecated)
179

180
Legacy entry point maintained for backward compatibility with Spark 1.x applications.
181

182
```scala { .api }
183
/**
184
 * Legacy entry point for Spark SQL (deprecated in favor of SparkSession)
185
 */
186
class SQLContext(sparkContext: SparkContext) {
187
  /** Get or create SparkSession */
188
  def sparkSession: SparkSession
189
  
190
  /** Execute SQL query */
191
  def sql(sqlText: String): DataFrame
192
  
193
  /** Read interface */
194
  def read: DataFrameReader
195
  
196
  /** UDF registration */
197
  def udf: UDFRegistration
198
  
199
  /** Create DataFrame */
200
  def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame
201
}
202

203
object SQLContext {
204
  /** Get active SQLContext */
205
  def getOrCreate(sparkContext: SparkContext): SQLContext
206
}
207
```
208

209
## Common Patterns
210

211
### Local Development Setup
212

213
```scala
214
val spark = SparkSession.builder()
215
  .appName("Local Development")
216
  .master("local[*]")  // Use all available cores
217
  .config("spark.sql.warehouse.dir", "spark-warehouse")
218
  .getOrCreate()
219

220
import spark.implicits._  // For DataFrame operations
221
```
222

223
### Cluster Deployment Setup
224

225
```scala
226
val spark = SparkSession.builder()
227
  .appName("Production Application")
228
  // Master set by spark-submit
229
  .config("spark.sql.adaptive.enabled", "true")
230
  .config("spark.sql.adaptive.coalescePartitions.enabled", "true")
231
  .config("spark.sql.adaptive.skewJoin.enabled", "true")
232
  .getOrCreate()
233
```
234

235
### Configuration Management
236

237
```scala
238
// Check if running in cluster mode
239
val isCluster = spark.conf.get("spark.master").startsWith("yarn") || 
240
                spark.conf.get("spark.master").startsWith("mesos")
241

242
// Adjust configuration based on environment
243
if (isCluster) {
244
  spark.conf.set("spark.sql.shuffle.partitions", "400")
245
} else {
246
  spark.conf.set("spark.sql.shuffle.partitions", "4")
247
}
248
```

Version

Tile

Files

session-management.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

session-management.mddocs/