Structured data representation and manipulation with type-safe access methods for distributed data processing. Row objects represent individual records in Spark SQL operations.
Core interface for representing structured data records.
/**
* Represents one row of output from a relational operator
*/
trait Row extends Serializable {
/** Number of elements in the row */
def length: Int
/** Alias for length */
def size: Int
/** Schema for the row (can be null) */
def schema: StructType
/** Returns value at position i */
def apply(i: Int): Any
/** Returns value at position i */
def get(i: Int): Any
/** Checks if value at position i is null */
def isNullAt(i: Int): Boolean
/** Generic accessor with type casting */
def getAs[T](i: Int): T
/** Generic accessor by field name */
def getAs[T](fieldName: String): T
/** Get field index by name */
def fieldIndex(name: String): Int
/** Make a copy of the row */
def copy(): Row
/** Convert row to sequence */
def toSeq: Seq[Any]
/** Get multiple field values as a map by field names */
def getValuesMap[T](fieldNames: Seq[String]): Map[String, T]
/** Check if any field in the row is null */
def anyNull: Boolean
/** Compact JSON representation */
def json: String
/** Pretty JSON representation */
def prettyJson: String
/** String representation without separator */
def mkString: String
/** String representation with separator */
def mkString(sep: String): String
/** String representation with start, separator, and end */
def mkString(start: String, sep: String, end: String): String
}Type-safe accessors for primitive data types.
trait Row extends Serializable {
// Boolean access
def getBoolean(i: Int): Boolean
// Numeric accessors
def getByte(i: Int): Byte
def getShort(i: Int): Short
def getInt(i: Int): Int
def getLong(i: Int): Long
def getFloat(i: Int): Float
def getDouble(i: Int): Double
// String and binary
def getString(i: Int): String
// Decimal numbers
def getDecimal(i: Int): java.math.BigDecimal
}Specialized accessors for temporal data types.
trait Row extends Serializable {
// Date types
def getDate(i: Int): java.sql.Date
def getLocalDate(i: Int): java.time.LocalDate
// Timestamp types
def getTimestamp(i: Int): java.sql.Timestamp
def getInstant(i: Int): java.time.Instant
}Accessors for complex collection types.
trait Row extends Serializable {
// Sequence/List access
def getSeq[T](i: Int): Seq[T]
def getList[T](i: Int): java.util.List[T]
// Map access
def getMap[K, V](i: Int): scala.collection.Map[K, V]
def getJavaMap[K, V](i: Int): java.util.Map[K, V]
// Nested struct access
def getStruct(i: Int): Row
}Factory methods for creating Row instances.
object Row {
/** Create row from variable arguments */
def apply(values: Any*): Row
/** Create row from sequence */
def fromSeq(values: Seq[Any]): Row
/** Create row from tuple */
def fromTuple(tuple: Product): Row
/** Pattern matching support */
def unapplySeq(row: Row): Some[Seq[Any]]
/** Empty row singleton */
def empty: Row
}Creating rows:
import org.apache.spark.sql.Row
// Create from individual values
val row1 = Row("Alice", 25, true, 55000.50)
// Create from sequence
val values = Seq("Bob", 30, false, 65000.75)
val row2 = Row.fromSeq(values)
// Create from tuple
val tuple = ("Charlie", 35, true, 75000.00)
val row3 = Row.fromTuple(tuple)
// Empty row
val emptyRow = Row.emptyAccessing row data by position:
val row = Row("Alice", 25, true, 55000.50, null)
// Basic access
val name: String = row.getString(0)
val age: Int = row.getInt(1)
val active: Boolean = row.getBoolean(2)
val salary: Double = row.getDouble(3)
// Check for nulls
val hasNullValue: Boolean = row.isNullAt(4)
// Generic access with casting
val nameGeneric: String = row.getAs[String](0)
val ageGeneric: Int = row.getAs[Int](1)
// Raw access (returns Any)
val rawName: Any = row.get(0)
val rawAge: Any = row(1) // shorthand for get(1)Accessing row data by field name:
import org.apache.spark.sql.types._
// Create schema
val schema = StructType(Array(
StructField("name", StringType, false),
StructField("age", IntegerType, false),
StructField("active", BooleanType, false),
StructField("salary", DoubleType, true)
))
// Assuming row has schema attached
val name: String = row.getAs[String]("name")
val age: Int = row.getAs[Int]("age")
val salary: Double = row.getAs[Double]("salary")
// Get field index
val nameIndex: Int = row.fieldIndex("name")Working with complex data types:
// Row with nested data
val complexRow = Row(
"Alice",
Seq("reading", "hiking", "coding"),
Map("home" -> "123-456-7890", "work" -> "098-765-4321"),
Row("123 Main St", "Anytown", "12345") // nested struct
)
// Access collections
val hobbies: Seq[String] = complexRow.getSeq[String](1)
val phones: Map[String, String] = complexRow.getMap[String, String](2)
// Access nested struct
val address: Row = complexRow.getStruct(3)
val street: String = address.getString(0)
val city: String = address.getString(1)
val zip: String = address.getString(2)
// Access with Java collections
val hobbiesList: java.util.List[String] = complexRow.getList[String](1)
val phonesMap: java.util.Map[String, String] = complexRow.getJavaMap[String, String](2)Row introspection and conversion:
val row = Row("Alice", 25, true)
// Basic properties
val length: Int = row.length // 3
val size: Int = row.size // 3
// Convert to other formats
val sequence: Seq[Any] = row.toSeq
val jsonCompact: String = row.json
val jsonPretty: String = row.prettyJson
// Copy row
val rowCopy: Row = row.copy()
// Get values as map by field names
val fieldNames = Seq("name", "age")
val valuesMap: Map[String, Any] = row.getValuesMap[Any](fieldNames)
println(s"Values: $valuesMap") // Map("name" -> "Alice", "age" -> 25)
// Check if any field is null
val hasAnyNull: Boolean = row.anyNull
println(s"Row has null values: $hasAnyNull")
// Pattern matching
row match {
case Row(name: String, age: Int, active: Boolean) =>
println(s"Person: $name, Age: $age, Active: $active")
case _ =>
println("Unexpected row structure")
}
// Destructuring with unapplySeq
val Row(name, age, active) = rowWorking with nullable values:
val rowWithNulls = Row("Alice", null, 25, null)
// Safe null checking
if (!rowWithNulls.isNullAt(1)) {
val middleName: String = rowWithNulls.getString(1)
} else {
println("Middle name is null")
}
if (!rowWithNulls.isNullAt(3)) {
val bonus: Double = rowWithNulls.getDouble(3)
} else {
println("Bonus is null")
}
// Using Option-like patterns with getAs
val maybeBonus = if (rowWithNulls.isNullAt(3)) None else Some(rowWithNulls.getAs[Double](3))Error handling:
val row = Row("Alice", 25)
try {
// This will throw IndexOutOfBoundsException
val nonExistent = row.getString(5)
} catch {
case _: IndexOutOfBoundsException =>
println("Field index out of bounds")
}
try {
// This will throw ClassCastException if types don't match
val wrongType: Boolean = row.getAs[Boolean](1) // age field as Boolean
} catch {
case _: ClassCastException =>
println("Type casting failed")
}