0
# Exception Handling
1
2
Comprehensive exception system for Apache Spark with error classes, structured messaging, and query context support.
3
4
## Core Exception Classes
5
6
### SparkException
7
8
Main exception class for Spark operations with support for error classes and query context.
9
10
```scala { .api }
11
import org.apache.spark.SparkException
12
13
class SparkException(
14
message: String,
15
cause: Throwable,
16
errorClass: Option[String],
17
messageParameters: Map[String, String],
18
context: Array[QueryContext] = Array.empty
19
) extends Exception(message, cause) with SparkThrowable
20
```
21
22
#### Alternative Constructors
23
24
```scala { .api }
25
// Basic message and cause
26
def this(message: String, cause: Throwable)
27
28
// Message only
29
def this(message: String)
30
31
// With error class and parameters
32
def this(errorClass: String, messageParameters: Map[String, String], cause: Throwable)
33
34
// With full context
35
def this(
36
errorClass: String,
37
messageParameters: Map[String, String],
38
cause: Throwable,
39
context: Array[QueryContext]
40
)
41
```
42
43
#### Methods
44
45
```scala { .api }
46
// Get message parameters as Java Map
47
def getMessageParameters(): java.util.Map[String, String]
48
49
// Get error class/condition
50
def getCondition(): String
51
52
// Get query context information
53
def getQueryContext(): Array[QueryContext]
54
```
55
56
#### Usage Examples
57
58
```scala { .api }
59
import org.apache.spark.SparkException
60
61
// Basic exception
62
val basicEx = new SparkException("Operation failed")
63
64
// Exception with cause
65
val exWithCause = new SparkException("Parse error", parseException)
66
67
// Structured exception with error class
68
val structuredEx = new SparkException(
69
"INVALID_INPUT_TYPE",
70
Map(
71
"inputType" -> "String",
72
"expectedType" -> "Integer",
73
"columnName" -> "age"
74
),
75
null
76
)
77
78
// Access error information
79
println(structuredEx.getCondition()) // "INVALID_INPUT_TYPE"
80
println(structuredEx.getMessageParameters()) // Java Map with parameters
81
```
82
83
### SparkException Companion Object
84
85
Static utility methods for exception creation and validation.
86
87
```scala { .api }
88
import org.apache.spark.SparkException
89
90
object SparkException {
91
// Create internal error exceptions
92
def internalError(msg: String): SparkException
93
def internalError(msg: String, cause: Throwable): SparkException
94
95
// Validation utility with structured errors
96
def require(
97
requirement: Boolean,
98
errorClass: String,
99
messageParameters: => Map[String, String]
100
): Unit
101
102
// Parameter conversion utility
103
def constructMessageParams(
104
messageParameters: java.util.Map[String, String]
105
): Map[String, String]
106
}
107
```
108
109
#### Usage Examples
110
111
```scala { .api }
112
// Validation with structured error
113
SparkException.require(
114
value >= 0,
115
"NEGATIVE_VALUE_ERROR",
116
Map("value" -> value.toString, "field" -> "count")
117
)
118
119
// Create internal error
120
val internalError = SparkException.internalError(
121
"Unexpected null value in critical path",
122
nullPointerException
123
)
124
125
// Parameter conversion
126
val javaParams: java.util.Map[String, String] = // ... from Java API
127
val scalaParams = SparkException.constructMessageParams(javaParams)
128
```
129
130
## Error Class Management
131
132
### ErrorClassesJsonReader
133
134
Developer API for loading and managing error class definitions from JSON files.
135
136
```scala { .api }
137
import org.apache.spark.ErrorClassesJsonReader
138
139
class ErrorClassesJsonReader(jsonFileURLs: Seq[URL])
140
```
141
142
#### Methods
143
144
```scala { .api }
145
// Get formatted error message with parameters
146
def getErrorMessage(
147
errorClass: String,
148
messageParameters: Map[String, Any]
149
): String
150
151
// Get parameter names for error class
152
def getMessageParameters(errorClass: String): Seq[String]
153
154
// Get raw message template
155
def getMessageTemplate(errorClass: String): String
156
157
// Get SQL state for error class
158
def getSqlState(errorClass: String): String
159
160
// Validate error class existence
161
def isValidErrorClass(errorClass: String): Boolean
162
```
163
164
#### Usage Examples
165
166
```scala { .api }
167
import java.net.URL
168
169
// Create reader with error definition files
170
val urls = Seq(new URL("file:///path/to/error-classes.json"))
171
val reader = new ErrorClassesJsonReader(urls)
172
173
// Get formatted error message
174
val message = reader.getErrorMessage(
175
"INVALID_INPUT_TYPE",
176
Map("inputType" -> "String", "expectedType" -> "Integer")
177
)
178
179
// Validate error class
180
if (reader.isValidErrorClass("CUSTOM_ERROR_CLASS")) {
181
val template = reader.getMessageTemplate("CUSTOM_ERROR_CLASS")
182
val sqlState = reader.getSqlState("CUSTOM_ERROR_CLASS")
183
}
184
185
// Get parameter requirements
186
val requiredParams = reader.getMessageParameters("MISSING_COLUMN_ERROR")
187
```
188
189
## Specialized Exception Types
190
191
These exception types are marked `private[spark]` but are part of Spark's internal exception hierarchy:
192
193
### Runtime Exceptions
194
- `SparkDriverExecutionException` - Driver execution failures
195
- `SparkUserAppException` - User application exit codes
196
- `ExecutorDeadException` - Dead executor exceptions
197
- `SparkRuntimeException` - General runtime exceptions
198
199
### Data Type Exceptions
200
- `SparkArithmeticException` - Arithmetic operation exceptions
201
- `SparkDateTimeException` - DateTime operation exceptions
202
- `SparkNumberFormatException` - Number parsing exceptions
203
204
### System Exceptions
205
- `SparkClassNotFoundException` - Class loading exceptions
206
- `SparkFileNotFoundException` - File operation exceptions
207
- `SparkSecurityException` - Security related exceptions
208
- `SparkConcurrentModificationException` - Concurrency exceptions
209
210
### Validation Exceptions
211
- `SparkIllegalArgumentException` - Illegal argument exceptions
212
- `SparkIllegalStateException` - Illegal state exceptions
213
- `SparkUnsupportedOperationException` - Unsupported operation exceptions
214
- `SparkNoSuchElementException` - Missing element exceptions
215
216
### Integration Exceptions
217
- `SparkPythonException` - Python interop exceptions
218
- `SparkSQLException` - SQL operation exceptions
219
- `SparkSQLFeatureNotSupportedException` - SQL feature support exceptions
220
- `SparkUpgradeException` - Version upgrade related exceptions
221
222
### Array Exceptions
223
- `SparkArrayIndexOutOfBoundsException` - Array bounds exceptions
224
225
## Best Practices
226
227
### Error Class Design
228
1. Use descriptive error class names following UPPER_SNAKE_CASE convention
229
2. Include relevant parameters for debugging and user guidance
230
3. Design reusable error classes that can apply to multiple scenarios
231
4. Provide meaningful message templates with parameter placeholders
232
233
### Exception Creation
234
```scala { .api }
235
// Good: Structured exception with context
236
throw new SparkException(
237
"COLUMN_TYPE_MISMATCH",
238
Map(
239
"columnName" -> column.name,
240
"actualType" -> actualType.toString,
241
"expectedType" -> expectedType.toString
242
),
243
cause
244
)
245
246
// Avoid: Plain string messages without structure
247
throw new SparkException("Column type mismatch in " + column.name)
248
```
249
250
### Validation Patterns
251
```scala { .api }
252
// Use require for preconditions
253
SparkException.require(
254
input.nonEmpty,
255
"EMPTY_INPUT_ERROR",
256
Map("inputName" -> inputName)
257
)
258
259
// Chain validations with appropriate error classes
260
SparkException.require(schema != null, "NULL_SCHEMA_ERROR", Map.empty)
261
SparkException.require(
262
schema.fields.nonEmpty,
263
"EMPTY_SCHEMA_ERROR",
264
Map("tableName" -> tableName)
265
)
266
```
267
268
### Error Context
269
```scala { .api }
270
// Include query context when available
271
val context = Array(
272
QueryContext(query, lineNumber, columnNumber, fragment)
273
)
274
275
throw new SparkException(
276
"SQL_PARSE_ERROR",
277
Map("query" -> query, "position" -> s"$lineNumber:$columnNumber"),
278
parseException,
279
context
280
)
281
```
282
283
## Integration Points
284
285
### With Logging Framework
286
```scala { .api }
287
import org.apache.spark.internal.{Logging, LogKeys, MDC}
288
289
class DataProcessor extends Logging {
290
def processColumn(columnName: String): Unit = {
291
try {
292
// Processing logic
293
} catch {
294
case ex: SparkException =>
295
logError(log"Processing failed for column ${MDC(LogKeys.COLUMN_NAME, columnName)}: " +
296
log"${MDC(LogKeys.ERROR_CLASS, ex.getCondition())}", ex)
297
throw ex
298
}
299
}
300
}
301
```
302
303
### With Configuration Management
304
```scala { .api }
305
// Use exceptions in configuration validation
306
def validateConfiguration(config: Map[String, String]): Unit = {
307
SparkException.require(
308
config.contains("spark.app.name"),
309
"MISSING_CONFIG_ERROR",
310
Map("configKey" -> "spark.app.name")
311
)
312
}
313
```
314
315
## Error Recovery Patterns
316
317
### Graceful Degradation
318
```scala { .api }
319
def tryProcessWithFallback[T](primary: () => T, fallback: () => T): T = {
320
try {
321
primary()
322
} catch {
323
case ex: SparkException if ex.getCondition() == "TRANSIENT_ERROR" =>
324
logWarning("Primary processing failed, using fallback", ex)
325
fallback()
326
case ex: SparkException =>
327
throw ex // Re-throw non-recoverable errors
328
}
329
}
330
```
331
332
### Retry with Exponential Backoff
333
```scala { .api }
334
def retryOnTransientError[T](maxAttempts: Int)(operation: () => T): T = {
335
var attempts = 0
336
var lastException: SparkException = null
337
338
while (attempts < maxAttempts) {
339
try {
340
return operation()
341
} catch {
342
case ex: SparkException if isTransientError(ex) =>
343
lastException = ex
344
attempts += 1
345
Thread.sleep(1000 * attempts) // Exponential backoff
346
case ex: SparkException =>
347
throw ex // Non-transient errors fail immediately
348
}
349
}
350
351
throw lastException
352
}
353
354
def isTransientError(ex: SparkException): Boolean = {
355
ex.getCondition() match {
356
case "NETWORK_ERROR" | "TEMPORARY_UNAVAILABLE" => true
357
case _ => false
358
}
359
}
360
```