Interactive Scala Shell for Apache Spark distributed computing
npx @tessl/cli install tessl/maven-org-apache-spark--spark-repl_2-11@2.4.0Apache Spark REPL is an interactive Scala shell specifically designed for Apache Spark distributed computing. It provides a command-line interface that allows users to interactively execute Spark code, explore datasets, and prototype distributed data processing workflows in real-time. The REPL extends the standard Scala interpreter with Spark-specific functionality, automatically providing access to SparkContext and SparkSession objects.
import org.apache.spark.repl.Main
import org.apache.spark.repl.SparkILoop
import org.apache.spark.repl.ExecutorClassLoader// Command line usage - launches interactive shell
org.apache.spark.repl.Main.main(Array.empty)
// Programmatic usage with custom configuration
import org.apache.spark.repl.SparkILoop
import scala.tools.nsc.Settings
val settings = new Settings()
val interp = new SparkILoop()
interp.process(settings)import org.apache.spark.repl.SparkILoop
// Run code and capture output
val output = SparkILoop.run("val x = 1 + 1; println(x)")
// Run multiple lines of code
val lines = List(
"val data = spark.range(1000)",
"val squares = data.map(x => x * x)",
"squares.count()"
)
val result = SparkILoop.run(lines)The Spark REPL consists of several key components:
Main object provides the application entry point and SparkSession managementSparkILoop extends Scala's standard REPL with Spark-specific initialization and featuresExecutorClassLoader enables distribution of REPL-generated classes to cluster executorsSignaling provides graceful job cancellation on interrupt signalsCore REPL application functionality including entry points, SparkSession management, and initialization.
object Main {
def main(args: Array[String]): Unit
def createSparkSession(): SparkSession
private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit
var sparkContext: SparkContext
var sparkSession: SparkSession
var interp: SparkILoop
val conf: SparkConf
val outputDir: File
}Interactive shell implementation with Spark-specific features and command processing.
class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) extends ILoop {
def this()
def this(in0: BufferedReader, out: JPrintWriter)
def initializeSpark(): Unit
def printWelcome(): Unit
def resetCommand(line: String): Unit
def replay(): Unit
def process(settings: Settings): Boolean
def createInterpreter(): Unit
val initializationCommands: Seq[String]
val commands: List[LoopCommand]
}
object SparkILoop {
def run(code: String, sets: Settings = new Settings): String
def run(lines: List[String]): String
}Class loading infrastructure for distributing REPL-generated classes to cluster executors.
class ExecutorClassLoader(
conf: SparkConf,
env: SparkEnv,
classUri: String,
parent: ClassLoader,
userClassPathFirst: Boolean
) extends ClassLoader {
def findClass(name: String): Class[_]
def findClassLocally(name: String): Option[Class[_]]
def readAndTransformClass(name: String, in: InputStream): Array[Byte]
def urlEncode(str: String): String
val uri: URI
val directory: String
val parentLoader: ParentClassLoader
}Signal handling utilities for graceful job cancellation and interrupt management.
object Signaling {
def cancelOnInterrupt(): Unit
}Scala 2.11-specific interpreter components that provide enhanced import handling and expression typing capabilities.
class SparkILoopInterpreter(settings: Settings, out: JPrintWriter) extends IMain {
def chooseHandler(member: Tree): MemberHandler
class SparkImportHandler(imp: Import) extends ImportHandler {
def targetType: Type
}
}
trait SparkExprTyper extends ExprTyper {
def doInterpret(code: String): IR.Result
}// Core Spark types used throughout the API
type SparkContext = org.apache.spark.SparkContext
type SparkSession = org.apache.spark.sql.SparkSession
type SparkConf = org.apache.spark.SparkConf
type SparkEnv = org.apache.spark.SparkEnv
// Scala REPL types
type Settings = scala.tools.nsc.Settings
type GenericRunnerSettings = scala.tools.nsc.GenericRunnerSettings
type ILoop = scala.tools.nsc.interpreter.ILoop
type IMain = scala.tools.nsc.interpreter.IMain
type LoopCommand = scala.tools.nsc.interpreter.LoopCommand
type JPrintWriter = scala.tools.nsc.interpreter.JPrintWriter
type MemberHandler = scala.tools.nsc.interpreter.MemberHandler
type ImportHandler = scala.tools.nsc.interpreter.ImportHandler
type ExprTyper = scala.tools.nsc.interpreter.ExprTyper
// Scala compiler types
type Tree = scala.tools.nsc.ast.Trees#Tree
type Import = scala.tools.nsc.ast.Trees#Import
type Type = scala.tools.nsc.Global#Type
type IR = scala.tools.nsc.interpreter.IR
// Java I/O types
type BufferedReader = java.io.BufferedReader
type InputStream = java.io.InputStream
type ByteArrayOutputStream = java.io.ByteArrayOutputStream
type FilterInputStream = java.io.FilterInputStream
type File = java.io.File
type URI = java.net.URI
type URL = java.net.URL
// Class loading types
type ClassLoader = java.lang.ClassLoader
type ParentClassLoader = org.apache.spark.util.ParentClassLoader
type ClassVisitor = org.apache.xbean.asm6.ClassVisitor
type ClassWriter = org.apache.xbean.asm6.ClassWriter
type ClassReader = org.apache.xbean.asm6.ClassReader
type MethodVisitor = org.apache.xbean.asm6.MethodVisitor
// Hadoop FileSystem types
type FileSystem = org.apache.hadoop.fs.FileSystem
type Path = org.apache.hadoop.fs.Path