Apache Spark Common Utils library providing core utilities and shared functionality for distributed computing operations
npx @tessl/cli install tessl/maven-org-apache-spark--spark-common-utils_2-12@3.5.00
# Apache Spark Common Utils
1
2
Apache Spark Common Utils is a core utility library providing essential infrastructure and shared functionality for Apache Spark's distributed computing ecosystem. It includes exception handling, storage management, Java functional interfaces, logging infrastructure, and various utility classes for collections, files, serialization, and schema operations.
3
4
## Package Information
5
6
- **Package Name**: org.apache.spark:spark-common-utils_2.12
7
- **Package Type**: maven
8
- **Language**: Scala/Java
9
- **Version**: 3.5.6
10
- **Installation**: Add to Maven dependencies:
11
12
```xml
13
<dependency>
14
<groupId>org.apache.spark</groupId>
15
<artifactId>spark-common-utils_2.12</artifactId>
16
<version>3.5.6</version>
17
</dependency>
18
```
19
20
## Core Imports
21
22
For Scala applications:
23
24
```scala
25
import org.apache.spark.{SparkException, SparkThrowable}
26
import org.apache.spark.storage.StorageLevel
27
import org.apache.spark.internal.Logging
28
```
29
30
For Java applications:
31
32
```java
33
import org.apache.spark.SparkException;
34
import org.apache.spark.SparkThrowable;
35
import org.apache.spark.api.java.function.*;
36
import org.apache.spark.storage.StorageLevel;
37
```
38
39
## Basic Usage
40
41
```scala
42
import org.apache.spark.{SparkException, SparkThrowable}
43
import org.apache.spark.storage.StorageLevel
44
import org.apache.spark.internal.Logging
45
46
// Exception handling with error classes
47
try {
48
// Some Spark operation
49
} catch {
50
case e: SparkException =>
51
println(s"Spark error: ${e.getErrorClass}")
52
println(s"Parameters: ${e.getMessageParameters}")
53
}
54
55
// Storage level configuration
56
val storageLevel = StorageLevel.MEMORY_AND_DISK_SER
57
println(s"Storage config: ${storageLevel.description}")
58
59
// Logging in Spark components
60
class MySparkComponent extends Logging {
61
def processData(): Unit = {
62
logInfo("Starting data processing")
63
// Processing logic here
64
logDebug("Data processing completed")
65
}
66
}
67
```
68
69
## Architecture
70
71
Apache Spark Common Utils is organized around several key architectural components:
72
73
- **Exception System**: Standardized error handling with `SparkException` and `SparkThrowable` interface providing error classes, message parameters, and query context
74
- **Storage Management**: `StorageLevel` class controlling RDD persistence strategies across memory, disk, and off-heap storage
75
- **Function Interfaces**: Java functional interfaces enabling lambda expressions and method references in Spark operations
76
- **Logging Infrastructure**: `Logging` trait providing SLF4J-based logging with lazy evaluation and level checking
77
- **Utility Ecosystem**: Helper classes for JSON processing, class loading, collections, serialization, file operations, and schema validation
78
79
## Capabilities
80
81
### Exception Handling
82
83
Comprehensive exception handling system with standardized error classes and contextual information for debugging distributed operations.
84
85
```scala { .api }
86
class SparkException(
87
message: String,
88
cause: Throwable,
89
errorClass: Option[String],
90
messageParameters: Map[String, String],
91
context: Array[QueryContext]
92
) extends Exception with SparkThrowable
93
94
object SparkException {
95
def internalError(msg: String, context: Array[QueryContext], summary: String): SparkException
96
def internalError(msg: String, context: Array[QueryContext], summary: String, category: Option[String]): SparkException
97
def internalError(msg: String): SparkException
98
def internalError(msg: String, category: String): SparkException
99
def internalError(msg: String, cause: Throwable): SparkException
100
}
101
102
interface SparkThrowable {
103
String getErrorClass();
104
default String getSqlState() { return SparkThrowableHelper.getSqlState(this.getErrorClass()); }
105
default boolean isInternalError() { return SparkThrowableHelper.isInternalError(this.getErrorClass()); }
106
default java.util.Map<String, String> getMessageParameters() { return new java.util.HashMap<>(); }
107
default QueryContext[] getQueryContext() { return new QueryContext[0]; }
108
}
109
```
110
111
[Exception Handling](./exception-handling.md)
112
113
### Storage Management
114
115
RDD storage level management for controlling persistence strategies across different storage tiers with replication options.
116
117
```scala { .api }
118
class StorageLevel(
119
useDisk: Boolean,
120
useMemory: Boolean,
121
useOffHeap: Boolean,
122
deserialized: Boolean,
123
replication: Int = 1
124
) extends Externalizable
125
126
class StorageLevel {
127
def useDisk: Boolean
128
def useMemory: Boolean
129
def useOffHeap: Boolean
130
def deserialized: Boolean
131
def replication: Int
132
def isValid: Boolean
133
def toInt: Int
134
def description: String
135
def clone(): StorageLevel
136
private[spark] def memoryMode: MemoryMode
137
}
138
139
object StorageLevel {
140
val NONE: StorageLevel
141
val DISK_ONLY: StorageLevel
142
val DISK_ONLY_2: StorageLevel
143
val DISK_ONLY_3: StorageLevel
144
val MEMORY_ONLY: StorageLevel
145
val MEMORY_ONLY_2: StorageLevel
146
val MEMORY_ONLY_SER: StorageLevel
147
val MEMORY_ONLY_SER_2: StorageLevel
148
val MEMORY_AND_DISK: StorageLevel
149
val MEMORY_AND_DISK_2: StorageLevel
150
val MEMORY_AND_DISK_SER: StorageLevel
151
val MEMORY_AND_DISK_SER_2: StorageLevel
152
val OFF_HEAP: StorageLevel
153
154
def apply(useDisk: Boolean, useMemory: Boolean, useOffHeap: Boolean, deserialized: Boolean, replication: Int): StorageLevel
155
def apply(useDisk: Boolean, useMemory: Boolean, deserialized: Boolean, replication: Int = 1): StorageLevel
156
def apply(flags: Int, replication: Int): StorageLevel
157
def fromString(s: String): StorageLevel
158
}
159
```
160
161
[Storage Management](./storage-management.md)
162
163
### Java Functional Interfaces
164
165
Comprehensive set of functional interfaces for Java integration with Spark's functional programming model, enabling lambda expressions and method references.
166
167
```java { .api }
168
@FunctionalInterface
169
interface Function<T1, R> extends Serializable {
170
R call(T1 v1) throws Exception;
171
}
172
173
@FunctionalInterface
174
interface MapFunction<T, U> extends Serializable {
175
U call(T value) throws Exception;
176
}
177
178
@FunctionalInterface
179
interface FilterFunction<T> extends Serializable {
180
boolean call(T value) throws Exception;
181
}
182
```
183
184
[Java Functional Interfaces](./java-functions.md)
185
186
### Logging Infrastructure
187
188
SLF4J-based logging infrastructure with lazy evaluation, level checking, and Spark-specific configuration management.
189
190
```scala { .api }
191
trait Logging {
192
protected def logInfo(msg: => String): Unit
193
protected def logDebug(msg: => String): Unit
194
protected def logWarning(msg: => String): Unit
195
protected def logError(msg: => String): Unit
196
protected def logError(msg: => String, throwable: Throwable): Unit
197
protected def isTraceEnabled(): Boolean
198
}
199
```
200
201
[Logging Infrastructure](./logging.md)
202
203
### Build and Version Information
204
205
Build metadata and version information for runtime introspection and compatibility checking.
206
207
```scala { .api }
208
private[spark] object SparkBuildInfo {
209
val spark_version: String
210
val spark_branch: String
211
val spark_revision: String
212
val spark_build_date: String
213
val spark_build_user: String
214
val spark_repo_url: String
215
val spark_doc_root: String
216
}
217
```
218
219
[Build Information](./build-info.md)
220
221
### Utility Classes
222
223
Collection of utility classes for common operations including JSON processing, class loading, collections manipulation, and network utilities.
224
225
```scala { .api }
226
private[spark] object JsonUtils extends JsonUtils {
227
def toJsonString(block: JsonGenerator => Unit): String
228
}
229
230
private[spark] object SparkClassUtils extends SparkClassUtils {
231
def getSparkClassLoader: ClassLoader
232
def classForName[C](className: String, initialize: Boolean): Class[C]
233
}
234
```
235
236
[Utility Classes](./utilities.md)
237
238
## Types
239
240
### Core Exception Types
241
242
```java { .api }
243
interface QueryContext {
244
String objectType();
245
String objectName();
246
int startIndex();
247
int stopIndex();
248
String fragment();
249
}
250
251
class SparkDriverExecutionException(cause: Throwable) extends SparkException
252
class SparkUserAppException(exitCode: Int) extends SparkException
253
class ExecutorDeadException(message: String) extends SparkException
254
class SparkUpgradeException(message: String, cause: Option[Throwable]) extends RuntimeException with SparkThrowable
255
class SparkArithmeticException(message: String) extends ArithmeticException with SparkThrowable
256
class SparkUnsupportedOperationException(message: String) extends UnsupportedOperationException with SparkThrowable
257
class SparkClassNotFoundException(errorClass: String, messageParameters: Map[String, String], cause: Throwable) extends ClassNotFoundException with SparkThrowable
258
class SparkConcurrentModificationException(errorClass: String, messageParameters: Map[String, String], cause: Throwable) extends ConcurrentModificationException with SparkThrowable
259
class SparkDateTimeException(message: String) extends DateTimeException with SparkThrowable
260
class SparkFileNotFoundException(errorClass: String, messageParameters: Map[String, String]) extends FileNotFoundException with SparkThrowable
261
class SparkNumberFormatException(message: String) extends NumberFormatException with SparkThrowable
262
class SparkIllegalArgumentException(message: String, cause: Option[Throwable]) extends IllegalArgumentException with SparkThrowable
263
class SparkRuntimeException(message: String, cause: Option[Throwable]) extends RuntimeException with SparkThrowable
264
class SparkNoSuchElementException(errorClass: String, messageParameters: Map[String, String]) extends NoSuchElementException with SparkThrowable
265
class SparkSecurityException(errorClass: String, messageParameters: Map[String, String]) extends SecurityException with SparkThrowable
266
class SparkArrayIndexOutOfBoundsException(message: String) extends ArrayIndexOutOfBoundsException with SparkThrowable
267
class SparkSQLException(errorClass: String, messageParameters: Map[String, String]) extends SQLException with SparkThrowable
268
class SparkSQLFeatureNotSupportedException(errorClass: String, messageParameters: Map[String, String]) extends SQLFeatureNotSupportedException with SparkThrowable
269
```
270
271
### Memory and Network Types
272
273
```java { .api }
274
enum MemoryMode {
275
ON_HEAP,
276
OFF_HEAP
277
}
278
279
enum ByteUnit {
280
BYTE(1),
281
KiB(1L << 10),
282
MiB(1L << 20),
283
GiB(1L << 30),
284
TiB(1L << 40),
285
PiB(1L << 50);
286
287
long convertFrom(long d, ByteUnit u);
288
long convertTo(long d, ByteUnit u);
289
long toBytes(long d);
290
long toKiB(long d);
291
long toMiB(long d);
292
long toGiB(long d);
293
long toTiB(long d);
294
long toPiB(long d);
295
}
296
```