or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

data-types.mdencoders.mderror-handling.mdindex.mdrow-operations.mdstreaming-operations.mdutilities.md
tile.json

row-operations.mddocs/

Row Operations

Structured data representation and manipulation with type-safe access methods for distributed data processing. Row objects represent individual records in Spark SQL operations.

Capabilities

Row Interface

Core interface for representing structured data records.

/**
 * Represents one row of output from a relational operator
 */
trait Row extends Serializable {
  /** Number of elements in the row */
  def length: Int
  
  /** Alias for length */
  def size: Int
  
  /** Schema for the row (can be null) */
  def schema: StructType
  
  /** Returns value at position i */
  def apply(i: Int): Any
  
  /** Returns value at position i */
  def get(i: Int): Any
  
  /** Checks if value at position i is null */
  def isNullAt(i: Int): Boolean
  
  /** Generic accessor with type casting */
  def getAs[T](i: Int): T
  
  /** Generic accessor by field name */
  def getAs[T](fieldName: String): T
  
  /** Get field index by name */
  def fieldIndex(name: String): Int
  
  /** Make a copy of the row */
  def copy(): Row
  
  /** Convert row to sequence */
  def toSeq: Seq[Any]
  
  /** Get multiple field values as a map by field names */
  def getValuesMap[T](fieldNames: Seq[String]): Map[String, T]
  
  /** Check if any field in the row is null */
  def anyNull: Boolean
  
  /** Compact JSON representation */
  def json: String
  
  /** Pretty JSON representation */
  def prettyJson: String
  
  /** String representation without separator */
  def mkString: String
  
  /** String representation with separator */
  def mkString(sep: String): String
  
  /** String representation with start, separator, and end */
  def mkString(start: String, sep: String, end: String): String
}

Primitive Type Accessors

Type-safe accessors for primitive data types.

trait Row extends Serializable {
  // Boolean access
  def getBoolean(i: Int): Boolean
  
  // Numeric accessors
  def getByte(i: Int): Byte
  def getShort(i: Int): Short
  def getInt(i: Int): Int
  def getLong(i: Int): Long
  def getFloat(i: Int): Float
  def getDouble(i: Int): Double
  
  // String and binary
  def getString(i: Int): String
  
  // Decimal numbers
  def getDecimal(i: Int): java.math.BigDecimal
}

Date and Time Accessors

Specialized accessors for temporal data types.

trait Row extends Serializable {
  // Date types
  def getDate(i: Int): java.sql.Date
  def getLocalDate(i: Int): java.time.LocalDate
  
  // Timestamp types  
  def getTimestamp(i: Int): java.sql.Timestamp
  def getInstant(i: Int): java.time.Instant
}

Collection Accessors

Accessors for complex collection types.

trait Row extends Serializable {
  // Sequence/List access
  def getSeq[T](i: Int): Seq[T]
  def getList[T](i: Int): java.util.List[T]
  
  // Map access
  def getMap[K, V](i: Int): scala.collection.Map[K, V]
  def getJavaMap[K, V](i: Int): java.util.Map[K, V]
  
  // Nested struct access
  def getStruct(i: Int): Row
}

Row Factory Methods

Factory methods for creating Row instances.

object Row {
  /** Create row from variable arguments */
  def apply(values: Any*): Row
  
  /** Create row from sequence */
  def fromSeq(values: Seq[Any]): Row
  
  /** Create row from tuple */
  def fromTuple(tuple: Product): Row
  
  /** Pattern matching support */
  def unapplySeq(row: Row): Some[Seq[Any]]
  
  /** Empty row singleton */
  def empty: Row
}

Usage Examples

Creating rows:

import org.apache.spark.sql.Row

// Create from individual values
val row1 = Row("Alice", 25, true, 55000.50)

// Create from sequence
val values = Seq("Bob", 30, false, 65000.75)
val row2 = Row.fromSeq(values)

// Create from tuple
val tuple = ("Charlie", 35, true, 75000.00)
val row3 = Row.fromTuple(tuple)

// Empty row
val emptyRow = Row.empty

Accessing row data by position:

val row = Row("Alice", 25, true, 55000.50, null)

// Basic access
val name: String = row.getString(0)
val age: Int = row.getInt(1) 
val active: Boolean = row.getBoolean(2)
val salary: Double = row.getDouble(3)

// Check for nulls
val hasNullValue: Boolean = row.isNullAt(4)

// Generic access with casting
val nameGeneric: String = row.getAs[String](0)
val ageGeneric: Int = row.getAs[Int](1)

// Raw access (returns Any)
val rawName: Any = row.get(0)
val rawAge: Any = row(1) // shorthand for get(1)

Accessing row data by field name:

import org.apache.spark.sql.types._

// Create schema
val schema = StructType(Array(
  StructField("name", StringType, false),
  StructField("age", IntegerType, false),
  StructField("active", BooleanType, false),
  StructField("salary", DoubleType, true)
))

// Assuming row has schema attached
val name: String = row.getAs[String]("name")
val age: Int = row.getAs[Int]("age")
val salary: Double = row.getAs[Double]("salary")

// Get field index
val nameIndex: Int = row.fieldIndex("name")

Working with complex data types:

// Row with nested data
val complexRow = Row(
  "Alice",
  Seq("reading", "hiking", "coding"),
  Map("home" -> "123-456-7890", "work" -> "098-765-4321"),
  Row("123 Main St", "Anytown", "12345") // nested struct
)

// Access collections
val hobbies: Seq[String] = complexRow.getSeq[String](1)
val phones: Map[String, String] = complexRow.getMap[String, String](2)

// Access nested struct
val address: Row = complexRow.getStruct(3)
val street: String = address.getString(0)
val city: String = address.getString(1)
val zip: String = address.getString(2)

// Access with Java collections
val hobbiesList: java.util.List[String] = complexRow.getList[String](1)
val phonesMap: java.util.Map[String, String] = complexRow.getJavaMap[String, String](2)

Row introspection and conversion:

val row = Row("Alice", 25, true)

// Basic properties
val length: Int = row.length        // 3
val size: Int = row.size           // 3  

// Convert to other formats
val sequence: Seq[Any] = row.toSeq
val jsonCompact: String = row.json
val jsonPretty: String = row.prettyJson

// Copy row
val rowCopy: Row = row.copy()

// Get values as map by field names
val fieldNames = Seq("name", "age")
val valuesMap: Map[String, Any] = row.getValuesMap[Any](fieldNames)
println(s"Values: $valuesMap") // Map("name" -> "Alice", "age" -> 25)

// Check if any field is null
val hasAnyNull: Boolean = row.anyNull
println(s"Row has null values: $hasAnyNull")

// Pattern matching
row match {
  case Row(name: String, age: Int, active: Boolean) =>
    println(s"Person: $name, Age: $age, Active: $active")
  case _ => 
    println("Unexpected row structure")
}

// Destructuring with unapplySeq  
val Row(name, age, active) = row

Working with nullable values:

val rowWithNulls = Row("Alice", null, 25, null)

// Safe null checking
if (!rowWithNulls.isNullAt(1)) {
  val middleName: String = rowWithNulls.getString(1)
} else {
  println("Middle name is null")
}

if (!rowWithNulls.isNullAt(3)) {
  val bonus: Double = rowWithNulls.getDouble(3)
} else {
  println("Bonus is null")
}

// Using Option-like patterns with getAs
val maybeBonus = if (rowWithNulls.isNullAt(3)) None else Some(rowWithNulls.getAs[Double](3))

Error handling:

val row = Row("Alice", 25)

try {
  // This will throw IndexOutOfBoundsException
  val nonExistent = row.getString(5)
} catch {
  case _: IndexOutOfBoundsException => 
    println("Field index out of bounds")
}

try {
  // This will throw ClassCastException if types don't match
  val wrongType: Boolean = row.getAs[Boolean](1) // age field as Boolean
} catch {
  case _: ClassCastException => 
    println("Type casting failed")
}