Spark SQL API module providing core SQL data types, rows, and foundational APIs for Spark SQL operations
npx @tessl/cli install tessl/maven-org-apache-spark--spark-sql-api_2-12@3.5.0Apache Spark SQL API provides the core SQL data types, row representations, and foundational APIs for Spark SQL operations. This library serves as the foundation for DataFrame and Dataset operations, SQL query execution, and structured streaming in Apache Spark's distributed computing framework.
maven: org.apache.spark:spark-sql-api_2.12:3.5.6// Core row and type imports
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
// Streaming state management
import org.apache.spark.sql.streaming.GroupState
// Encoding support
import org.apache.spark.sql.Encoder
// Error handling
import org.apache.spark.sql.AnalysisExceptionimport org.apache.spark.sql.{Row, AnalysisException}
import org.apache.spark.sql.types._
// Create a schema for structured data
val schema = StructType(Array(
StructField("name", StringType, nullable = false),
StructField("age", IntegerType, nullable = false),
StructField("salary", DecimalType(10, 2), nullable = true)
))
// Create rows of data
val row1 = Row("Alice", 25, BigDecimal("55000.00"))
val row2 = Row.fromSeq(Seq("Bob", 30, BigDecimal("65000.00")))
// Access row data
val name: String = row1.getAs[String]("name")
val age: Int = row1.getInt(1)
val hasNullSalary: Boolean = row1.isNullAt(2)
// Work with complex data types
val arrayType = ArrayType(StringType, containsNull = true)
val mapType = MapType(StringType, IntegerType, valueContainsNull = false)
val nestedSchema = StructType(Array(
StructField("addresses", arrayType, nullable = true),
StructField("scores", mapType, nullable = false)
))The Spark SQL API is built around several key components:
Comprehensive type system including primitives, collections, and complex nested structures. Essential for defining schemas and working with structured data.
// Base type hierarchy
abstract class DataType extends AbstractDataType
abstract class AbstractDataType
// Primitive types
case object StringType extends StringType
case object IntegerType extends IntegerType
case object LongType extends LongType
case object DoubleType extends DoubleType
case object BooleanType extends BooleanType
// Complex types
case class DecimalType(precision: Int, scale: Int) extends FractionalType
case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType
case class MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean) extends DataType
case class StructType(fields: Array[StructField]) extends DataTypeStructured data representation and manipulation with type-safe access methods for distributed data processing.
trait Row extends Serializable {
def length: Int
def apply(i: Int): Any
def get(i: Int): Any
def isNullAt(i: Int): Boolean
def getAs[T](i: Int): T
def getAs[T](fieldName: String): T
def getString(i: Int): String
def getInt(i: Int): Int
def getLong(i: Int): Long
def getDouble(i: Int): Double
def getBoolean(i: Int): Boolean
}
object Row {
def apply(values: Any*): Row
def fromSeq(values: Seq[Any]): Row
def fromTuple(tuple: Product): Row
}Stateful operations for complex streaming analytics with timeout support and watermark handling.
trait GroupState[S] extends LogicalGroupState[S] {
def exists: Boolean
def get: S
def getOption: Option[S]
def update(newState: S): Unit
def remove(): Unit
def hasTimedOut: Boolean
def setTimeoutDuration(durationMs: Long): Unit
def setTimeoutTimestamp(timestampMs: Long): Unit
def getCurrentWatermarkMs(): Long
def getCurrentProcessingTimeMs(): Long
}Type-safe conversion between JVM objects and Spark SQL representations for distributed serialization.
trait Encoder[T] extends Serializable {
def schema: StructType
def clsTag: ClassTag[T]
}
trait AgnosticEncoder[T] extends Encoder[T] {
def isPrimitive: Boolean
def nullable: Boolean
def dataType: DataType
}Structured exception handling with detailed error information for query analysis and execution.
class AnalysisException(
message: String,
line: Option[Int] = None,
startPosition: Option[Int] = None,
errorClass: Option[String] = None,
messageParameters: Map[String, String] = Map.empty,
context: Array[QueryContext] = Array.empty
) extends Exception with SparkThrowable {
def withPosition(origin: Origin): AnalysisException
def getSimpleMessage: String
}Helper utilities for data type conversions and integrations with external systems.
object ArrowUtils {
def toArrowType(dt: DataType, timeZoneId: String, largeVarTypes: Boolean = false): ArrowType
def fromArrowType(dt: ArrowType): DataType
def toArrowSchema(schema: StructType, timeZoneId: String, errorOnDuplicatedFieldNames: Boolean, largeVarTypes: Boolean = false): Schema
def fromArrowSchema(schema: Schema): StructType
}