Catalyst is a library for manipulating relational query plans used as the foundation for Spark SQL's query optimizer and execution engine
Core interface for working with structured row data in Spark Catalyst, providing both generic access and type-safe methods for data manipulation.
The Row trait provides the primary interface for accessing structured data in Catalyst.
/**
* Represents one row of output from a relational operator.
* Allows both generic access by ordinal and native primitive access.
*/
trait Row extends Serializable {
/** Number of elements in the Row */
def size: Int
/** Number of elements in the Row */
def length: Int
/** Schema for the row - returns null by default */
def schema: StructType = null
/** Returns the value at position i */
def apply(i: Int): Any
/** Returns the value at position i */
def get(i: Int): Any
/** Checks whether the value at position i is null */
def isNullAt(i: Int): Boolean
/** Make a copy of the current Row object */
def copy(): Row
/** Returns true if there are any NULL values in this row */
def anyNull: Boolean
/** Return a Scala Seq representing the row */
def toSeq: Seq[Any]
}Usage Examples:
import org.apache.spark.sql._
// Create a Row from values
val row = Row(1, "Alice", true, null)
// Access values by position
val id = row.getInt(0) // 1
val name = row.getString(1) // "Alice"
val active = row.getBoolean(2) // true
val value = row.get(3) // null
// Check for null values
val isNull = row.isNullAt(3) // true
// Get row size and convert to sequence
val size = row.length // 4
val seq = row.toSeq // Seq(1, "Alice", true, null)Type-safe accessors for primitive values with automatic casting. All primitive accessors internally use the getAnyValAs method which performs null checking.
/**
* Returns the value at position i as a primitive boolean.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getBoolean(i: Int): Boolean
/**
* Returns the value at position i as a primitive byte.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getByte(i: Int): Byte
/**
* Returns the value at position i as a primitive short.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getShort(i: Int): Short
/**
* Returns the value at position i as a primitive int.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getInt(i: Int): Int
/**
* Returns the value at position i as a primitive long.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getLong(i: Int): Long
/**
* Returns the value at position i as a primitive float.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getFloat(i: Int): Float
/**
* Returns the value at position i as a primitive double.
* @throws ClassCastException when data type does not match
* @throws NullPointerException when value is null
*/
def getDouble(i: Int): DoubleAccessors for object types including strings, decimals, dates, and timestamps.
/**
* Returns the value at position i as a String object.
* @throws ClassCastException when data type does not match
*/
def getString(i: Int): String
/**
* Returns the value at position i of decimal type as java.math.BigDecimal.
* @throws ClassCastException when data type does not match
*/
def getDecimal(i: Int): java.math.BigDecimal
/**
* Returns the value at position i of date type as java.sql.Date.
* @throws ClassCastException when data type does not match
*/
def getDate(i: Int): java.sql.Date
/**
* Returns the value at position i of timestamp type as java.sql.Timestamp.
* @throws ClassCastException when data type does not match
*/
def getTimestamp(i: Int): java.sql.TimestampAccessors for complex types including arrays, maps, and structs.
/**
* Returns the value at position i of array type as a Scala Seq.
* @throws ClassCastException when data type does not match
*/
def getSeq[T](i: Int): Seq[T]
/**
* Returns the value at position i of array type as java.util.List.
* @throws ClassCastException when data type does not match
*/
def getList[T](i: Int): java.util.List[T]
/**
* Returns the value at position i of map type as a Scala Map.
* @throws ClassCastException when data type does not match
*/
def getMap[K, V](i: Int): scala.collection.Map[K, V]
/**
* Returns the value at position i of map type as java.util.Map.
* @throws ClassCastException when data type does not match
*/
def getJavaMap[K, V](i: Int): java.util.Map[K, V]
/**
* Returns the value at position i of struct type as a Row object.
* @throws ClassCastException when data type does not match
*/
def getStruct(i: Int): RowGeneric type-safe accessors and field name based access.
/**
* Returns the value at position i with generic type casting.
* For primitive types if value is null it returns 'zero value' specific for primitive
* @throws ClassCastException when data type does not match
*/
def getAs[T](i: Int): T
/**
* Returns the value of a given fieldName.
* @throws UnsupportedOperationException when schema is not defined
* @throws IllegalArgumentException when fieldName does not exist
* @throws ClassCastException when data type does not match
*/
def getAs[T](fieldName: String): T
/**
* Returns the index of a given field name.
* Default implementation throws UnsupportedOperationException.
* @throws UnsupportedOperationException when schema is not defined ("fieldIndex on a Row without schema is undefined.")
* @throws IllegalArgumentException when fieldName does not exist
*/
def fieldIndex(name: String): Int = {
throw new UnsupportedOperationException("fieldIndex on a Row without schema is undefined.")
}
/**
* Returns a Map(name -> value) for the requested fieldNames
* @throws UnsupportedOperationException when schema is not defined
* @throws IllegalArgumentException when fieldName does not exist
* @throws ClassCastException when data type does not match
*/
def getValuesMap[T](fieldNames: Seq[String]): Map[String, T]Factory methods for creating Row instances from various data sources.
object Row {
/**
* Pattern matching extractor for Row objects.
* Example: case Row(key: Int, value: String) => key -> value
*/
def unapplySeq(row: Row): Some[Seq[Any]]
/**
* Create a Row with the given values.
*/
def apply(values: Any*): Row
/**
* Create a Row from a Seq of values.
*/
def fromSeq(values: Seq[Any]): Row
/**
* Create a Row from a tuple.
*/
def fromTuple(tuple: Product): Row
/**
* Merge multiple rows into a single row, one after another.
*/
def merge(rows: Row*): Row
/** Returns an empty row */
val empty: Row
}Usage Examples:
import org.apache.spark.sql._
// Create rows from different sources
val row1 = Row(1, "Alice", 25.5)
val row2 = Row.fromSeq(Seq(2, "Bob", 30.0))
val row3 = Row.fromTuple((3, "Charlie", 35.5))
// Merge rows
val merged = Row.merge(row1, row2)
// Result: Row with values (1, "Alice", 25.5, 2, "Bob", 30.0)
// Pattern matching
val pairs = Seq(Row(1, "Alice"), Row(2, "Bob")).map {
case Row(id: Int, name: String) => id -> name
}
// Empty row
val empty = Row.emptyAdditional utility methods for row manipulation and display.
/** Displays all elements of this sequence in a string (without a separator) */
def mkString: String
/** Displays all elements of this sequence in a string using a separator string */
def mkString(sep: String): String
/**
* Displays all elements of this sequence in a string using start, end, and separator strings
*/
def mkString(start: String, sep: String, end: String): StringUsage Examples:
val row = Row(1, "Alice", true)
// String representations
val str1 = row.mkString // "1Alicetrue"
val str2 = row.mkString(", ") // "1, Alice, true"
val str3 = row.mkString("[", ", ", "]") // "[1, Alice, true]"
// Default toString
val str4 = row.toString() // "[1,Alice,true]"Install with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-catalyst-2-10