or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

catalog.mdcolumns-functions.mddata-io.mddataset-dataframe.mdindex.mdsession-management.mdstreaming.mdtypes-encoders.mdudfs.md
tile.json

columns-functions.mddocs/

Column Operations and Functions

Column expressions and built-in SQL functions for data transformation. Provides both operator overloading and function-based APIs for creating complex expressions, with support for all SQL operations and advanced analytics functions.

Capabilities

Column Class

Represents a column in a DataFrame that can be composed into complex expressions.

/**
 * A column in a DataFrame that can be composed into complex expressions
 */
class Column {
  /** Equality comparison (SQL-safe null handling) */
  def ===(other: Any): Column
  def =!=(other: Any): Column
  
  /** Numeric comparison */
  def >(other: Any): Column
  def >=(other: Any): Column
  def <(other: Any): Column
  def <=(other: Any): Column
  
  /** Arithmetic operations */
  def +(other: Any): Column
  def -(other: Any): Column
  def *(other: Any): Column
  def /(other: Any): Column
  def %(other: Any): Column
  
  /** Logical operations */
  def &&(other: Column): Column
  def ||(other: Column): Column
  def unary_!(): Column
  
  /** Null handling */
  def isNull: Column
  def isNotNull: Column
  def isNaN: Column
  
  /** String operations */
  def startsWith(other: Column): Column
  def startsWith(literal: String): Column
  def endsWith(other: Column): Column
  def endsWith(literal: String): Column
  def contains(other: Column): Column
  def contains(literal: String): Column
  def like(literal: String): Column
  def rlike(literal: String): Column
  
  /** Type operations */
  def cast(to: DataType): Column
  def cast(to: String): Column
  
  /** Alias operations */
  def as(alias: String): Column
  def as(alias: Symbol): Column
  def alias(alias: String): Column
  
  /** Sorting */
  def asc: Column
  def asc_nulls_first: Column
  def asc_nulls_last: Column
  def desc: Column
  def desc_nulls_first: Column
  def desc_nulls_last: Column
  
  /** Window operations */
  def over(window: WindowSpec): Column
  
  /** Collection operations */
  def getItem(key: Any): Column
  def getField(fieldName: String): Column
  
  /** Boolean operations for arrays */
  def isin(list: Any*): Column
}

TypedColumn

Column with type information for input and output types.

/**
 * Column with type information
 * @tparam T Input type
 * @tparam U Output type  
 */
class TypedColumn[T, U] extends Column {
  /** Apply to Dataset */
  def apply(dataset: Dataset[T]): Dataset[U]
}

Core Functions

Essential functions for column creation and manipulation.

object functions {
  /** Create column from column name */
  def col(colName: String): Column
  def column(colName: String): Column
  
  /** Create literal column */
  def lit(literal: Any): Column
  def typedLit[T : TypeTag](literal: T): Column
  
  /** Conditional expressions */
  def when(condition: Column, value: Any): Column
  def coalesce(e: Column*): Column
  def greatest(e: Column*): Column
  def least(e: Column*): Column
  def isnull(e: Column): Column
  def isnan(e: Column): Column
  def nanvl(col1: Column, col2: Column): Column
}

Usage Examples:

import org.apache.spark.sql.functions._

val df = spark.table("employees")

// Column operations
val filtered = df.filter(col("age") > 25 && col("department") === "Engineering")
val calculated = df.withColumn("bonus", col("salary") * 0.1)
val sorted = df.orderBy(col("salary").desc, col("name").asc)

// Conditional logic
val categorized = df.withColumn("category",
  when(col("age") < 30, "Young")
    .when(col("age") < 50, "Middle")
    .otherwise("Senior")
)

// Null handling
val cleaned = df.filter(col("email").isNotNull && col("phone").isNotNull)
val filled = df.withColumn("phone", coalesce(col("phone"), lit("N/A")))

Aggregate Functions

Statistical and aggregate functions for data analysis.

object functions {
  /** Count functions */
  def count(e: Column): Column
  def countDistinct(expr: Column, exprs: Column*): Column
  def approx_count_distinct(e: Column): Column
  def approx_count_distinct(e: Column, rsd: Double): Column
  
  /** Basic aggregates */
  def sum(e: Column): Column
  def sumDistinct(e: Column): Column
  def avg(e: Column): Column
  def mean(e: Column): Column
  def min(e: Column): Column
  def max(e: Column): Column
  def first(e: Column): Column
  def first(e: Column, ignoreNulls: Boolean): Column
  def last(e: Column): Column
  def last(e: Column, ignoreNulls: Boolean): Column
  
  /** Statistical functions */
  def stddev(e: Column): Column
  def stddev_pop(e: Column): Column
  def stddev_samp(e: Column): Column
  def variance(e: Column): Column
  def var_pop(e: Column): Column
  def var_samp(e: Column): Column
  def skewness(e: Column): Column
  def kurtosis(e: Column): Column
  def corr(column1: Column, column2: Column): Column
  def covar_pop(column1: Column, column2: Column): Column
  def covar_samp(column1: Column, column2: Column): Column
  
  /** Collection aggregates */
  def collect_list(e: Column): Column
  def collect_set(e: Column): Column
}

Mathematical Functions

Mathematical operations and functions.

object functions {
  /** Basic math */
  def abs(e: Column): Column
  def negate(e: Column): Column
  def sqrt(e: Column): Column
  def cbrt(e: Column): Column
  
  /** Power and logarithmic */
  def pow(l: Column, r: Column): Column
  def pow(l: Column, r: Double): Column
  def exp(e: Column): Column
  def expm1(e: Column): Column
  def log(e: Column): Column
  def log(base: Double, e: Column): Column
  def log10(e: Column): Column
  def log1p(e: Column): Column
  def log2(e: Column): Column
  
  /** Trigonometric */
  def sin(e: Column): Column
  def cos(e: Column): Column
  def tan(e: Column): Column
  def asin(e: Column): Column
  def acos(e: Column): Column
  def atan(e: Column): Column
  def atan2(y: Column, x: Column): Column
  def sinh(e: Column): Column
  def cosh(e: Column): Column
  def tanh(e: Column): Column
  
  /** Rounding */
  def round(e: Column): Column
  def round(e: Column, scale: Int): Column
  def bround(e: Column): Column
  def bround(e: Column, scale: Int): Column
  def ceil(e: Column): Column
  def floor(e: Column): Column
  
  /** Random */
  def rand(): Column
  def rand(seed: Long): Column
  def randn(): Column
  def randn(seed: Long): Column
}

String Functions

String manipulation and processing functions.

object functions {
  /** String operations */
  def length(e: Column): Column
  def char_length(e: Column): Column
  def bit_length(e: Column): Column
  def octet_length(e: Column): Column
  
  /** Case conversion */
  def upper(e: Column): Column
  def lower(e: Column): Column
  def initcap(e: Column): Column
  
  /** Trimming */
  def trim(e: Column): Column
  def ltrim(e: Column): Column
  def rtrim(e: Column): Column
  
  /** Padding */
  def lpad(str: Column, len: Int, pad: String): Column
  def rpad(str: Column, len: Int, pad: String): Column
  
  /** Concatenation */
  def concat(exprs: Column*): Column
  def concat_ws(sep: String, exprs: Column*): Column
  
  /** Substring operations */
  def substring(str: Column, pos: Int, len: Int): Column
  def substring_index(str: Column, delim: String, count: Int): Column
  def left(str: Column, len: Int): Column
  def right(str: Column, len: Int): Column
  
  /** Pattern matching */
  def regexp_extract(e: Column, exp: String, groupIdx: Int): Column
  def regexp_replace(e: Column, pattern: String, replacement: String): Column
  def split(str: Column, pattern: String): Column
  def split(str: Column, pattern: String, limit: Int): Column
  
  /** String search */
  def instr(str: Column, substring: String): Column
  def locate(substr: String, str: Column): Column
  def locate(substr: String, str: Column, pos: Int): Column
  
  /** Encoding */
  def ascii(e: Column): Column
  def base64(e: Column): Column
  def unbase64(e: Column): Column
  def encode(value: Column, charset: String): Column
  def decode(value: Column, charset: String): Column
  
  /** Format */
  def format_number(x: Column, d: Int): Column
  def format_string(format: String, arguments: Column*): Column
}

Date and Time Functions

Date, time, and timestamp manipulation functions.

object functions {
  /** Current date/time */
  def current_date(): Column
  def current_timestamp(): Column
  def now(): Column
  def unix_timestamp(): Column
  def unix_timestamp(s: Column): Column
  def unix_timestamp(s: Column, p: String): Column
  
  /** Date/time conversion */
  def to_date(e: Column): Column
  def to_date(e: Column, fmt: String): Column
  def to_timestamp(s: Column): Column
  def to_timestamp(s: Column, fmt: String): Column
  def from_unixtime(ut: Column): Column
  def from_unixtime(ut: Column, f: String): Column
  
  /** Date arithmetic */
  def date_add(start: Column, days: Int): Column
  def date_sub(start: Column, days: Int): Column
  def datediff(end: Column, start: Column): Column
  def months_between(end: Column, start: Column): Column
  def months_between(end: Column, start: Column, roundOff: Boolean): Column
  def add_months(start: Column, months: Int): Column
  def next_day(date: Column, dayOfWeek: String): Column
  def last_day(e: Column): Column
  
  /** Date/time extraction */
  def year(e: Column): Column
  def quarter(e: Column): Column
  def month(e: Column): Column
  def dayofmonth(e: Column): Column
  def dayofweek(e: Column): Column
  def dayofyear(e: Column): Column
  def hour(e: Column): Column
  def minute(e: Column): Column
  def second(e: Column): Column
  def weekofyear(e: Column): Column
  
  /** Date/time formatting */
  def date_format(dateExpr: Column, format: String): Column
  def from_utc_timestamp(ts: Column, tz: String): Column
  def to_utc_timestamp(ts: Column, tz: String): Column
  
  /** Time window */
  def window(timeColumn: Column, windowDuration: String): Column
  def window(timeColumn: Column, windowDuration: String, slideDuration: String): Column
  def window(timeColumn: Column, windowDuration: String, slideDuration: String, startTime: String): Column
}

Array and Map Functions

Functions for working with complex data types.

object functions {
  /** Array creation */
  def array(cols: Column*): Column
  def array(colName: String, colNames: String*): Column
  def split(str: Column, pattern: String): Column
  
  /** Array operations */
  def array_contains(column: Column, value: Any): Column
  def array_distinct(e: Column): Column
  def array_except(col1: Column, col2: Column): Column
  def array_intersect(col1: Column, col2: Column): Column
  def array_join(column: Column, delimiter: String): Column
  def array_join(column: Column, delimiter: String, nullReplacement: String): Column
  def array_max(e: Column): Column
  def array_min(e: Column): Column
  def array_position(column: Column, value: Any): Column
  def array_remove(column: Column, element: Any): Column
  def array_repeat(e: Column, count: Int): Column
  def array_sort(e: Column): Column
  def array_union(col1: Column, col2: Column): Column
  def arrays_overlap(a1: Column, a2: Column): Column
  def arrays_zip(e: Column*): Column
  def concat(exprs: Column*): Column
  def element_at(column: Column, extraction: Any): Column
  def explode(e: Column): Column
  def explode_outer(e: Column): Column
  def flatten(e: Column): Column
  def posexplode(e: Column): Column
  def posexplode_outer(e: Column): Column
  def reverse(e: Column): Column
  def sequence(start: Column, stop: Column): Column
  def sequence(start: Column, stop: Column, step: Column): Column
  def shuffle(e: Column): Column
  def size(e: Column): Column
  def slice(x: Column, start: Int, length: Int): Column
  def sort_array(e: Column): Column
  def sort_array(e: Column, asc: Boolean): Column
  
  /** Map creation */
  def map(cols: Column*): Column
  def map_from_arrays(keys: Column, values: Column): Column
  def map_from_entries(e: Column): Column
  
  /** Map operations */
  def map_concat(cols: Column*): Column
  def map_entries(e: Column): Column
  def map_filter(expr: Column, f: (Column, Column) => Column): Column
  def map_keys(e: Column): Column
  def map_values(e: Column): Column
  def map_zip_with(left: Column, right: Column, f: (Column, Column, Column) => Column): Column
  
  /** Struct operations */
  def struct(cols: Column*): Column
  def struct(colName: String, colNames: String*): Column
}

Window Functions

Analytics functions for window-based calculations.

object functions {
  /** Ranking functions */
  def row_number(): Column
  def rank(): Column
  def dense_rank(): Column
  def percent_rank(): Column
  def ntile(n: Int): Column
  def cume_dist(): Column
  
  /** Analytic functions */
  def lag(e: Column, offset: Int): Column
  def lag(e: Column, offset: Int, defaultValue: Any): Column
  def lead(e: Column, offset: Int): Column
  def lead(e: Column, offset: Int, defaultValue: Any): Column
  def first_value(e: Column): Column
  def first_value(e: Column, ignoreNulls: Boolean): Column
  def last_value(e: Column): Column
  def last_value(e: Column, ignoreNulls: Boolean): Column
  def nth_value(e: Column, n: Int): Column
  def nth_value(e: Column, n: Int, ignoreNulls: Boolean): Column
}

/**
 * Window specification for window functions
 */
class WindowSpec {
  def partitionBy(colNames: String*): WindowSpec
  def partitionBy(cols: Column*): WindowSpec
  def orderBy(colNames: String*): WindowSpec
  def orderBy(cols: Column*): WindowSpec
  def rowsBetween(start: Long, end: Long): WindowSpec
  def rangeBetween(start: Long, end: Long): WindowSpec
}

object Window {
  /** Create unbounded window */
  def partitionBy(colNames: String*): WindowSpec
  def partitionBy(cols: Column*): WindowSpec
  def orderBy(colNames: String*): WindowSpec
  def orderBy(cols: Column*): WindowSpec
  
  /** Window frame boundaries */
  val unboundedPreceding: Long
  val unboundedFollowing: Long  
  val currentRow: Long
}

Usage Examples:

import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions.Window

val sales = spark.table("sales")

// Window functions
val windowSpec = Window.partitionBy("region").orderBy(col("date"))

val salesWithRank = sales.withColumn("rank", 
  row_number().over(windowSpec)
)

val salesWithLag = sales.withColumn("prev_amount", 
  lag(col("amount"), 1).over(windowSpec)
)

// Running totals
val runningTotal = sales.withColumn("running_total",
  sum(col("amount")).over(windowSpec.rowsBetween(Window.unboundedPreceding, Window.currentRow))
)

// Moving averages  
val movingAvg = sales.withColumn("moving_avg_3",
  avg(col("amount")).over(windowSpec.rowsBetween(-2, 0))
)