Distributed SQL query engine for Apache Spark with DataFrame/Dataset APIs, comprehensive SQL functions, streaming capabilities, and data source connectors
npx @tessl/cli install tessl/maven-org-apache-spark--spark-sql-2-13@4.0.0Apache Spark SQL is a distributed SQL query engine built on top of Apache Spark's core engine that enables users to execute SQL queries, work with DataFrames and Datasets, and perform complex data transformations on large-scale distributed data. It provides a comprehensive SQL interface with support for ANSI SQL compliance, advanced query optimization through the Catalyst optimizer, and seamless integration with various data sources including JSON, Parquet, Delta Lake, and JDBC databases.
org.apache.spark:spark-sql_2.13:4.0.1import org.apache.spark.sql.{SparkSession, Dataset, DataFrame, Column, Row}
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._For Java:
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import static org.apache.spark.sql.functions.*;import org.apache.spark.sql.{SparkSession, functions => F}
// Create SparkSession - entry point to Spark SQL
val spark = SparkSession.builder()
.appName("Spark SQL Example")
.master("local[*]")
.getOrCreate()
// Create a DataFrame from JSON data
val df = spark.read.json("path/to/data.json")
// Perform transformations
val result = df
.select(F.col("name"), F.col("age"))
.filter(F.col("age") > 18)
.groupBy(F.col("department"))
.agg(F.count("*").as("employee_count"))
// Execute SQL queries directly
val sqlResult = spark.sql("SELECT department, COUNT(*) FROM employees WHERE age > 18 GROUP BY department")
// Show results
result.show()
// Clean up
spark.stop()Apache Spark SQL is built around several key components:
Primary entry point for all Spark SQL operations, managing the Spark context and providing unified APIs for both batch and streaming workloads.
abstract class SparkSession extends Serializable with Closeable {
// Configuration and lifecycle
def version: String
def conf: RuntimeConfig
def sparkContext: SparkContext
def sessionState: SessionState
def sharedState: SharedState
def newSession(): SparkSession
def stop(): Unit
def close(): Unit
// SQL execution
def sql(sqlText: String): DataFrame
def sqlContext: SQLContext
// Data access
def read: DataFrameReader
def readStream: DataStreamReader
def catalog: Catalog
def table(tableName: String): DataFrame
def range(end: Long): Dataset[Long]
def range(start: Long, end: Long): Dataset[Long]
def range(start: Long, end: Long, step: Long): Dataset[Long]
def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Long]
// DataFrame creation
def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame
def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame
def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame
def createDataset[T : Encoder](data: Seq[T]): Dataset[T]
def createDataset[T : Encoder](data: RDD[T]): Dataset[T]
def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T]
// UDF registration
def udf: UDFRegistration
// Streaming
def streams: StreamingQueryManager
// Experimental and advanced features
def experimental: ExperimentalMethods
def listenerManager: ExecutionListenerManager
// Time travel and versioning
def time: TimeTravel
}
object SparkSession {
def builder(): Builder
def getActiveSession: Option[SparkSession]
def getDefaultSession: Option[SparkSession]
def setActiveSession(session: SparkSession): Unit
def setDefaultSession(session: SparkSession): Unit
def clearActiveSession(): Unit
def clearDefaultSession(): Unit
class Builder {
def appName(name: String): Builder
def master(master: String): Builder
def config(key: String, value: String): Builder
def config(key: String, value: Boolean): Builder
def config(key: String, value: Long): Builder
def config(key: String, value: Double): Builder
def config(conf: SparkConf): Builder
def enableHiveSupport(): Builder
def getOrCreate(): SparkSession
}
}Strongly-typed and untyped distributed collections with functional programming APIs and SQL-like operations for data transformation and analysis.
abstract class Dataset[T] extends Serializable {
// Basic transformations
def select(cols: Column*): DataFrame
def select(col: String, cols: String*): DataFrame
def selectExpr(exprs: String*): DataFrame
def filter(condition: Column): Dataset[T]
def filter(conditionExpr: String): Dataset[T]
def where(condition: Column): Dataset[T]
def where(conditionExpr: String): Dataset[T]
// Grouping and aggregation
def groupBy(cols: Column*): RelationalGroupedDataset
def groupBy(col1: String, cols: String*): RelationalGroupedDataset
def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T]
def agg(expr: Column, exprs: Column*): DataFrame
def agg(exprs: Map[String, String]): DataFrame
// Joins
def join(right: Dataset[_]): DataFrame
def join(right: Dataset[_], joinExprs: Column): DataFrame
def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame
def join(right: Dataset[_], usingColumn: String): DataFrame
def crossJoin(right: Dataset[_]): DataFrame
def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)]
// Sorting
def sort(sortExprs: Column*): Dataset[T]
def sort(sortCol: String, sortCols: String*): Dataset[T]
def orderBy(sortExprs: Column*): Dataset[T]
def orderBy(sortCol: String, sortCols: String*): Dataset[T]
// Set operations
def union(other: Dataset[T]): Dataset[T]
def unionByName(other: Dataset[T]): Dataset[T]
def intersect(other: Dataset[T]): Dataset[T]
def except(other: Dataset[T]): Dataset[T]
def distinct(): Dataset[T]
def dropDuplicates(): Dataset[T]
def dropDuplicates(colNames: Array[String]): Dataset[T]
// Column operations
def withColumn(colName: String, col: Column): DataFrame
def withColumnRenamed(existingName: String, newName: String): DataFrame
def drop(colName: String): DataFrame
def drop(col: Column): DataFrame
// Typed transformations
def map[U: Encoder](func: T => U): Dataset[U]
def flatMap[U: Encoder](func: T => IterableOnce[U]): Dataset[U]
def mapPartitions[U: Encoder](func: Iterator[T] => Iterator[U]): Dataset[U]
// Sampling and partitioning
def sample(fraction: Double): Dataset[T]
def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T]
def randomSplit(weights: Array[Double]): Array[Dataset[T]]
def repartition(numPartitions: Int): Dataset[T]
def repartition(partitionExprs: Column*): Dataset[T]
def coalesce(numPartitions: Int): Dataset[T]
def limit(n: Int): Dataset[T]
// Actions
def count(): Long
def collect(): Array[T]
def collectAsList(): java.util.List[T]
def take(n: Int): Array[T]
def first(): T
def head(): T
def head(n: Int): Array[T]
def show(): Unit
def show(numRows: Int): Unit
def show(numRows: Int, truncate: Boolean): Unit
def reduce(func: (T, T) => T): T
def foreach(f: T => Unit): Unit
def foreachPartition(f: Iterator[T] => Unit): Unit
// Schema and metadata
def schema: StructType
def printSchema(): Unit
def dtypes: Array[(String, String)]
def columns: Array[String]
def isLocal: Boolean
def isEmpty: Boolean
def isStreaming: Boolean
// Type conversions and casting
def as[U: Encoder]: Dataset[U]
def toDF(): DataFrame
def toDF(colNames: String*): DataFrame
// Persistence
def cache(): Dataset[T]
def persist(): Dataset[T]
def persist(newLevel: StorageLevel): Dataset[T]
def unpersist(): Dataset[T]
def unpersist(blocking: Boolean): Dataset[T]
// I/O
def write: DataFrameWriter[T]
def writeStream: DataStreamWriter[T]
}
type DataFrame = Dataset[Row]DataFrame and Dataset Operations
Comprehensive library of 749+ built-in functions for data manipulation, including aggregate, string, date/time, mathematical, array/map, and window functions.
object functions {
def col(colName: String): Column
def lit(literal: Any): Column
def when(condition: Column, value: Any): Column
def sum(e: Column): Column
def count(e: Column): Column
def concat(exprs: Column*): Column
def date_format(dateExpr: Column, format: String): Column
}Rich type system supporting primitive types, complex nested structures, and user-defined types with comprehensive schema management capabilities.
abstract class DataType {
def typeName: String
def json: String
}
case class StructType(fields: Array[StructField]) extends DataType
case class StructField(name: String, dataType: DataType, nullable: Boolean)Real-time data processing with support for multiple execution modes, stateful operations, watermarking, and exactly-once semantics.
class DataStreamReader {
def format(source: String): DataStreamReader
def schema(schema: StructType): DataStreamReader
def schema(schemaString: String): DataStreamReader
def option(key: String, value: String): DataStreamReader
def option(key: String, value: Boolean): DataStreamReader
def option(key: String, value: Long): DataStreamReader
def option(key: String, value: Double): DataStreamReader
def options(options: Map[String, String]): DataStreamReader
def options(options: java.util.Map[String, String]): DataStreamReader
def load(): DataFrame
def load(path: String): DataFrame
def json(path: String): DataFrame
def parquet(path: String): DataFrame
def text(path: String): DataFrame
def csv(path: String): DataFrame
def orc(path: String): DataFrame
def table(tableName: String): DataFrame
}
class DataStreamWriter[T] {
def outputMode(outputMode: OutputMode): DataStreamWriter[T]
def outputMode(outputMode: String): DataStreamWriter[T]
def trigger(trigger: Trigger): DataStreamWriter[T]
def format(source: String): DataStreamWriter[T]
def option(key: String, value: String): DataStreamWriter[T]
def option(key: String, value: Boolean): DataStreamWriter[T]
def option(key: String, value: Long): DataStreamWriter[T]
def option(key: String, value: Double): DataStreamWriter[T]
def options(options: Map[String, String]): DataStreamWriter[T]
def options(options: java.util.Map[String, String]): DataStreamWriter[T]
def partitionBy(colNames: String*): DataStreamWriter[T]
def queryName(queryName: String): DataStreamWriter[T]
def start(): StreamingQuery
def start(path: String): StreamingQuery
def toTable(tableName: String): StreamingQuery
def foreach(writer: ForeachWriter[T]): DataStreamWriter[T]
def foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T]
}Comprehensive data source support for reading from and writing to various formats including Parquet, JSON, CSV, JDBC databases, and cloud storage systems.
class DataFrameReader {
def format(source: String): DataFrameReader
def schema(schema: StructType): DataFrameReader
def option(key: String, value: String): DataFrameReader
def load(): DataFrame
def json(path: String): DataFrame
def parquet(path: String): DataFrame
def jdbc(url: String, table: String, properties: Properties): DataFrame
}Metadata management for databases, tables, views, and functions with comprehensive catalog inspection and manipulation capabilities.
abstract class Catalog {
def currentDatabase: String
def listDatabases(): Dataset[Database]
def listTables(): Dataset[Table]
def listFunctions(): Dataset[Function]
def cacheTable(tableName: String): Unit
}Support for registering custom functions in Scala, Java, Python, and R with type-safe interfaces and SQL integration.
abstract class UDFRegistration {
def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction
def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction
}
case class UserDefinedFunction(f: AnyRef, dataType: DataType, inputTypes: Option[Seq[DataType]])trait Row extends Serializable {
def get(i: Int): Any
def getString(i: Int): String
def getInt(i: Int): Int
def getLong(i: Int): Long
def getDouble(i: Int): Double
def getBoolean(i: Int): Boolean
def schema: StructType
}
case class Column(expr: Expression) {
// Type conversion and encoding
def as[U: Encoder]: TypedColumn[Any, U]
def as(alias: String): Column
def as(aliases: Seq[String]): Column
def as(aliases: Array[String]): Column
def as(alias: Symbol): Column
def as(alias: String, metadata: Metadata): Column
def cast(to: DataType): Column
def cast(to: String): Column
def try_cast(to: DataType): Column
def try_cast(to: String): Column
// Naming and aliasing
def name(alias: String): Column
def alias(alias: String): Column
// Arithmetic operations
def unary_- : Column
def +(other: Any): Column
def plus(other: Any): Column
def -(other: Any): Column
def minus(other: Any): Column
def *(other: Any): Column
def multiply(other: Any): Column
def /(other: Any): Column
def divide(other: Any): Column
def %(other: Any): Column
def mod(other: Any): Column
// Comparison operations
def ===(other: Any): Column
def equalTo(other: Any): Column
def =!=(other: Any): Column
def notEqual(other: Any): Column
def >(other: Any): Column
def gt(other: Any): Column
def <(other: Any): Column
def lt(other: Any): Column
def <=(other: Any): Column
def leq(other: Any): Column
def >=(other: Any): Column
def geq(other: Any): Column
def <=>(other: Any): Column
def eqNullSafe(other: Any): Column
def between(lowerBound: Any, upperBound: Any): Column
// Logical operations
def unary_! : Column
def ||(other: Any): Column
def or(other: Column): Column
def &&(other: Any): Column
def and(other: Column): Column
// Bitwise operations
def bitwiseOR(other: Any): Column
def bitwiseAND(other: Any): Column
def bitwiseXOR(other: Any): Column
// Null/NaN testing
def isNaN: Column
def isNull: Column
def isNotNull: Column
// String operations
def like(literal: String): Column
def rlike(literal: String): Column
def ilike(literal: String): Column
def contains(other: Any): Column
def startsWith(other: Column): Column
def startsWith(literal: String): Column
def endsWith(other: Column): Column
def endsWith(literal: String): Column
def substr(startPos: Column, len: Column): Column
def substr(startPos: Int, len: Int): Column
// Collection and structure operations
def apply(extraction: Any): Column
def getItem(key: Any): Column
def getField(fieldName: String): Column
def withField(fieldName: String, col: Column): Column
def dropFields(fieldNames: String*): Column
def isin(list: Any*): Column
def isInCollection(values: scala.collection.Iterable[_]): Column
def isInCollection(values: java.lang.Iterable[_]): Column
// Conditional logic
def when(condition: Column, value: Any): Column
def otherwise(value: Any): Column
// Sorting operations
def desc: Column
def desc_nulls_first: Column
def desc_nulls_last: Column
def asc: Column
def asc_nulls_first: Column
def asc_nulls_last: Column
// Window operations
def over(window: expressions.WindowSpec): Column
def over(): Column
}
class RelationalGroupedDataset(
df: DataFrame,
groupingExprs: Seq[Expression],
groupType: RelationalGroupedDataset.GroupType
) {
def agg(expr: Column, exprs: Column*): DataFrame
def count(): DataFrame
def mean(colNames: String*): DataFrame
def max(colNames: String*): DataFrame
def min(colNames: String*): DataFrame
def sum(colNames: String*): DataFrame
def pivot(pivotColumn: String): RelationalGroupedDataset
def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset
}
class KeyValueGroupedDataset[K, V](
kEncoder: Encoder[K],
vEncoder: Encoder[V]
) {
def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)]
def count(): Dataset[(K, Long)]
def cogroup[U](other: KeyValueGroupedDataset[K, U]): Dataset[(K, (Iterable[V], Iterable[U]))]
def flatMapGroups[U: Encoder](f: (K, Iterator[V]) => Iterator[U]): Dataset[U]
def mapGroups[U: Encoder](f: (K, Iterator[V]) => U): Dataset[(K, U)]
def reduceGroups(f: (V, V) => V): Dataset[(K, V)]
}
abstract class Expression extends Serializable {
def dataType: DataType
def nullable: Boolean
def eval(input: InternalRow): Any
def sql: String
}
abstract class OutputMode {
def name: String
}
object OutputMode {
val Append: OutputMode
val Complete: OutputMode
val Update: OutputMode
}
abstract class Trigger {
def name: String
}
object Trigger {
def ProcessingTime(interval: String): Trigger
def ProcessingTime(interval: Duration): Trigger
def Once(): Trigger
def Continuous(interval: String): Trigger
def Continuous(interval: Duration): Trigger
def AvailableNow(): Trigger
}
abstract class StreamingQuery {
def id: UUID
def name: String
def isActive: Boolean
def awaitTermination(): Unit
def awaitTermination(timeoutMs: Long): Boolean
def stop(): Unit
def processAllAvailable(): Unit
def lastProgress: StreamingQueryProgress
def recentProgress: Array[StreamingQueryProgress]
def status: StreamingQueryStatus
def exception: Option[StreamingQueryException]
}
case class Database(
name: String,
catalog: Option[String],
description: Option[String],
locationUri: String
)
case class Table(
name: String,
catalog: Option[String],
namespace: Array[String],
description: Option[String],
tableType: String,
isTemporary: Boolean
)
case class Function(
name: String,
catalog: Option[String],
namespace: Array[String],
description: Option[String],
className: String,
isTemporary: Boolean
)
abstract class Encoder[T] extends Serializable {
def schema: StructType
def clsTag: ClassTag[T]
}
class TypedColumn[-T, U](
node: ColumnNode,
encoder: Encoder[U]
) extends Column {
def name(alias: String): TypedColumn[T, U]
}
abstract class StorageLevel extends Serializable {
def useDisk: Boolean
def useMemory: Boolean
def useOffHeap: Boolean
def deserialized: Boolean
def replication: Int
}
object StorageLevel {
val NONE: StorageLevel
val DISK_ONLY: StorageLevel
val DISK_ONLY_2: StorageLevel
val MEMORY_ONLY: StorageLevel
val MEMORY_ONLY_2: StorageLevel
val MEMORY_ONLY_SER: StorageLevel
val MEMORY_ONLY_SER_2: StorageLevel
val MEMORY_AND_DISK: StorageLevel
val MEMORY_AND_DISK_2: StorageLevel
val MEMORY_AND_DISK_SER: StorageLevel
val MEMORY_AND_DISK_SER_2: StorageLevel
val OFF_HEAP: StorageLevel
}
class DataFrameWriter[T] {
def mode(saveMode: SaveMode): DataFrameWriter[T]
def mode(saveMode: String): DataFrameWriter[T]
def format(source: String): DataFrameWriter[T]
def option(key: String, value: String): DataFrameWriter[T]
def options(options: Map[String, String]): DataFrameWriter[T]
def partitionBy(colNames: String*): DataFrameWriter[T]
def bucketBy(numBuckets: Int, colName: String, colNames: String*): DataFrameWriter[T]
def sortBy(colName: String, colNames: String*): DataFrameWriter[T]
def save(): Unit
def save(path: String): Unit
def insertInto(tableName: String): Unit
def saveAsTable(name: String): Unit
def json(path: String): Unit
def parquet(path: String): Unit
def orc(path: String): Unit
def text(path: String): Unit
def csv(path: String): Unit
def jdbc(url: String, table: String, connectionProperties: Properties): Unit
}
abstract class SaveMode {
def name(): String
}
object SaveMode {
val Append: SaveMode
val Overwrite: SaveMode
val ErrorIfExists: SaveMode
val Ignore: SaveMode
}
abstract class InternalRow extends Serializable {
def get(ordinal: Int, dataType: DataType): Any
def isNullAt(ordinal: Int): Boolean
def getBoolean(ordinal: Int): Boolean
def getByte(ordinal: Int): Byte
def getShort(ordinal: Int): Short
def getInt(ordinal: Int): Int
def getLong(ordinal: Int): Long
def getFloat(ordinal: Int): Float
def getDouble(ordinal: Int): Double
}
case class StreamingQueryProgress(
id: UUID,
runId: UUID,
name: String,
timestamp: String,
batchId: Long,
batchDuration: Long,
durationMs: Map[String, Long],
eventTime: Map[String, String],
stateOperators: Seq[StateOperatorProgress],
sources: Seq[SourceProgress],
sink: SinkProgress,
observedMetrics: Map[String, Row]
)
case class StreamingQueryStatus(
message: String,
isDataAvailable: Boolean,
isTriggerActive: Boolean
)
case class StreamingQueryException(
message: String,
cause: Option[String],
startOffset: Option[OffsetSeq],
endOffset: Option[OffsetSeq]
) extends Exception(message)
case class StateOperatorProgress(
operatorName: String,
numRowsTotal: Long,
numRowsUpdated: Long,
allUpdatesTimeMs: Long,
numRowsRemoved: Long,
allRemovalsTimeMs: Long,
commitTimeMs: Long,
memoryUsedBytes: Long,
numRowsDroppedByWatermark: Long,
numShufflePartitions: Long,
numStateStoreInstances: Long,
customMetrics: Map[String, Long]
)
case class SourceProgress(
description: String,
startOffset: Option[String],
endOffset: Option[String],
latestOffset: Option[String],
numInputRows: Long,
inputRowsPerSecond: Double,
processedRowsPerSecond: Double,
metrics: Map[String, String]
)
case class SinkProgress(
description: String,
numOutputRows: Option[Long],
metrics: Map[String, String]
)
abstract class OffsetSeq extends Serializable {
def toStreamProgress(sources: Seq[Source]): StreamingQueryProgress
}
abstract class WindowSpec {
def partitionBy(cols: Column*): WindowSpec
def partitionBy(colNames: String*): WindowSpec
def orderBy(cols: Column*): WindowSpec
def orderBy(colNames: String*): WindowSpec
def rowsBetween(start: Long, end: Long): WindowSpec
def rangeBetween(start: Long, end: Long): WindowSpec
}
object Window {
def partitionBy(cols: Column*): WindowSpec
def partitionBy(colNames: String*): WindowSpec
def orderBy(cols: Column*): WindowSpec
def orderBy(colNames: String*): WindowSpec
val unboundedPreceding: Long
val unboundedFollowing: Long
val currentRow: Long
}
abstract class Metadata extends Serializable {
def json: String
def contains(key: String): Boolean
def getLong(key: String): Long
def getDouble(key: String): Double
def getBoolean(key: String): Boolean
def getString(key: String): String
def getMetadata(key: String): Metadata
}
object Metadata {
val empty: Metadata
def fromJson(json: String): Metadata
}
abstract class ColumnNode extends Serializable {
def sql: String
def normalized: ColumnNode
}
abstract class Source extends Serializable {
def schema: StructType
}
abstract class Properties extends java.util.Properties
abstract class ForeachWriter[T] extends Serializable {
def open(partitionId: Long, epochId: Long): Boolean
def process(value: T): Unit
def close(errorOrNull: Throwable): Unit
}
abstract class ClassTag[T] extends Serializable
abstract class RuntimeConfig extends Serializable {
def get(key: String): String
def get(key: String, defaultValue: String): String
def getOption(key: String): Option[String]
def set(key: String, value: String): RuntimeConfig
def set(key: String, value: Boolean): RuntimeConfig
def set(key: String, value: Long): RuntimeConfig
def set(key: String, value: Double): RuntimeConfig
def unset(key: String): RuntimeConfig
def isModifiable(key: String): Boolean
}
abstract class SparkContext extends Serializable {
def version: String
def applicationId: String
def applicationAttemptId: Option[String]
def master: String
def appName: String
def jars: Seq[String]
def startTime: Long
def defaultParallelism: Int
def defaultMinPartitions: Int
def hadoopConfiguration: Configuration
def stop(): Unit
}
abstract class SparkConf extends Serializable {
def set(key: String, value: String): SparkConf
def setAppName(name: String): SparkConf
def setMaster(master: String): SparkConf
def get(key: String): String
def get(key: String, defaultValue: String): String
def getOption(key: String): Option[String]
def getAll: Array[(String, String)]
def contains(key: String): Boolean
def remove(key: String): SparkConf
}
abstract class SessionState extends Serializable {
def catalog: SessionCatalog
def conf: SQLConf
def experimentalMethods: ExperimentalMethods
def functionRegistry: FunctionRegistry
def udf: UDFRegistration
def analyzer: Analyzer
def optimizer: Optimizer
def planner: SparkPlanner
def streamingQueryManager: StreamingQueryManager
}
abstract class SharedState extends Serializable {
def sparkContext: SparkContext
def cacheManager: CacheManager
def listener: SQLListener
def externalCatalog: ExternalCatalog
def globalTempViewManager: GlobalTempViewManager
def streamingQueryManager: StreamingQueryManager
}
abstract class SQLContext extends Serializable {
def sparkSession: SparkSession
def sparkContext: SparkContext
def sql(sqlText: String): DataFrame
def table(tableName: String): DataFrame
def tableNames(): Array[String]
def tableNames(databaseName: String): Array[String]
def tables(): DataFrame
def tables(databaseName: String): DataFrame
}
abstract class StreamingQueryManager extends Serializable {
def active: Array[StreamingQuery]
def get(id: String): StreamingQuery
def get(id: UUID): StreamingQuery
def resetTerminated(): Unit
def awaitAnyTermination(): Unit
def awaitAnyTermination(timeoutMs: Long): Boolean
}
abstract class ExperimentalMethods extends Serializable
abstract class ExecutionListenerManager extends Serializable {
def register(listener: QueryExecutionListener): Unit
def unregister(listener: QueryExecutionListener): Unit
def clear(): Unit
}
abstract class TimeTravel extends Serializable
abstract class Configuration extends Serializable
abstract class SessionCatalog extends Serializable
abstract class SQLConf extends Serializable
abstract class FunctionRegistry extends Serializable
abstract class Analyzer extends Serializable
abstract class Optimizer extends Serializable
abstract class SparkPlanner extends Serializable
abstract class CacheManager extends Serializable
abstract class SQLListener extends Serializable
abstract class ExternalCatalog extends Serializable
abstract class GlobalTempViewManager extends Serializable
abstract class QueryExecutionListener extends Serializable
abstract class RDD[T] extends Serializable {
def collect(): Array[T]
def count(): Long
def first(): T
def take(num: Int): Array[T]
def foreach(f: T => Unit): Unit
def map[U: ClassTag](f: T => U): RDD[U]
def filter(f: T => Boolean): RDD[T]
def cache(): RDD[T]
def persist(): RDD[T]
def unpersist(): RDD[T]
}