or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

data-types.mdencoders.mderror-handling.mdindex.mdrow-operations.mdstreaming-operations.mdutilities.md
tile.json

data-types.mddocs/

Data Types

Comprehensive type system for Spark SQL including primitives, collections, and complex nested structures. Essential for defining schemas and working with structured data in distributed environments.

Capabilities

Base Type Hierarchy

Foundation classes for all Spark SQL data types.

/**
 * Base class for all Spark SQL data types
 */
abstract class DataType extends AbstractDataType {
  def defaultSize: Int
  def typeName: String
  def json: String
  def prettyJson: String
  def simpleString: String
  def catalogString: String
  def sql: String
  def sameType(other: DataType): Boolean
  def asNullable: DataType
}

/**
 * Base for non-concrete data types
 */
abstract class AbstractDataType {
  def defaultConcreteType: DataType
  def acceptsType(other: DataType): Boolean
  def simpleString: String
}

Factory and parsing methods:

object DataType {
  def fromDDL(ddl: String): DataType
  def fromJson(json: String): DataType
  def parseTypeWithFallback(
    schema: String, 
    parser: String => DataType, 
    fallbackParser: String => DataType
  ): DataType
  def equalsIgnoreNullability(left: DataType, right: DataType): Boolean
  def equalsStructurally(from: DataType, to: DataType, ignoreNullability: Boolean): Boolean
}

Primitive Data Types

Basic scalar data types for fundamental values.

// String and binary types
case object StringType extends StringType
case object BinaryType extends BinaryType

// Numeric types
case object ByteType extends ByteType           // 1 byte
case object ShortType extends ShortType         // 2 bytes  
case object IntegerType extends IntegerType     // 4 bytes
case object LongType extends LongType           // 8 bytes
case object FloatType extends FloatType         // 4 bytes
case object DoubleType extends DoubleType       // 8 bytes

// Boolean and null
case object BooleanType extends BooleanType
case object NullType extends NullType

Decimal Type

High-precision decimal numbers with configurable precision and scale.

/**
 * Fixed precision decimal numbers
 * @param precision Total number of digits (max 38)
 * @param scale Number of digits after decimal point (max 38)
 */
case class DecimalType(precision: Int, scale: Int) extends FractionalType {
  def isWiderThan(other: DataType): Boolean
  def isTighterThan(other: DataType): Boolean
}

object DecimalType {
  val MAX_PRECISION: Int = 38
  val MAX_SCALE: Int = 38
  val SYSTEM_DEFAULT: DecimalType = DecimalType(38, 18)
  val USER_DEFAULT: DecimalType = DecimalType(10, 0)
  
  def forType(dataType: DataType): DecimalType
  def adjustPrecisionScale(precision: Int, scale: Int): DecimalType
  def is32BitDecimalType(dt: DataType): Boolean
  def is64BitDecimalType(dt: DataType): Boolean
  def isByteArrayDecimalType(dt: DataType): Boolean
}

Date and Time Types

Temporal data types for handling dates, timestamps, and time intervals.

// Date and timestamp types
case object DateType extends DateType               // 4 bytes, date only
case object TimestampType extends TimestampType     // 8 bytes, with timezone
case object TimestampNTZType extends TimestampNTZType // 8 bytes, no timezone

// Interval types
case object CalendarIntervalType extends CalendarIntervalType // 16 bytes

/**
 * Day-time intervals (days, hours, minutes, seconds)
 */
case class DayTimeIntervalType(startField: Byte, endField: Byte) extends DataType

/**
 * Year-month intervals (years, months)  
 */
case class YearMonthIntervalType(startField: Byte, endField: Byte) extends DataType

Character Types

Fixed and variable-length character string types.

/**
 * Fixed-length character strings
 * @param length Maximum character length
 */
case class CharType(length: Int) extends DataType

/**
 * Variable-length character strings  
 * @param length Maximum character length
 */
case class VarcharType(length: Int) extends DataType

Collection Types

Array and map types for handling collections of data.

/**
 * Array of elements with same type
 * @param elementType Type of array elements
 * @param containsNull Whether array can contain null values
 */
case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType

object ArrayType {
  def apply(elementType: DataType): ArrayType = ArrayType(elementType, containsNull = true)
}

/**
 * Key-value mappings
 * @param keyType Type of map keys
 * @param valueType Type of map values  
 * @param valueContainsNull Whether values can be null
 */
case class MapType(
  keyType: DataType, 
  valueType: DataType, 
  valueContainsNull: Boolean
) extends DataType

Struct Types

Complex nested structures with named fields.

/**
 * Complex nested structures with named fields
 * @param fields Array of struct fields
 */
case class StructType(fields: Array[StructField]) extends DataType with Seq[StructField] {
  // Field access
  def fieldNames: Array[String]
  def names: Array[String]
  def apply(name: String): StructField
  def apply(names: Set[String]): StructType
  def fieldIndex(name: String): Int
  
  // Schema building - multiple add() overloads
  def add(field: StructField): StructType
  def add(name: String, dataType: DataType): StructType
  def add(name: String, dataType: DataType, nullable: Boolean): StructType
  def add(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): StructType
  def add(name: String, dataType: DataType, nullable: Boolean, comment: String): StructType
  
  // Schema operations
  def merge(that: StructType, caseSensitive: Boolean = true): StructType
  def toDDL: String
  def treeString: String
  def treeString(maxDepth: Int): String
  def printTreeString(): Unit
  
  // Advanced field operations
  def findNestedField(
    fieldNames: Seq[String], 
    includeCollections: Boolean = false,
    resolver: Resolver = resolver
  ): Option[(Seq[String], StructField)]
}

object StructType {
  def apply(fields: Seq[StructField]): StructType
  def fromDDL(ddl: String): StructType
  def findMissingFields(
    source: StructType, 
    target: StructType, 
    resolver: Resolver
  ): Option[StructType]
}

Struct Fields

Individual fields within struct types.

/**
 * Field within a StructType
 * @param name Field name
 * @param dataType Field data type
 * @param nullable Whether field can be null
 * @param metadata Field metadata
 */
case class StructField(
  name: String, 
  dataType: DataType, 
  nullable: Boolean, 
  metadata: Metadata = Metadata.empty
) {
  // Metadata operations
  def withComment(comment: String): StructField
  def getComment(): Option[String]
  def withCurrentDefaultValue(value: String): StructField  
  def getCurrentDefaultValue(): Option[String]
  def clearCurrentDefaultValue(): StructField
  def toDDL: String
}

User-Defined Types

Base class for custom data types.

/**
 * Base for custom user-defined data types
 */
abstract class UserDefinedType[UserType >: Null] extends DataType with Serializable {
  def sqlType: DataType
  def pyUDT: String
}

/**
 * Registry for user-defined types
 */
object UDTRegistration {
  // Registration methods for UDTs
}

Metadata Support

Metadata system for attaching additional information to schema fields.

/**
 * Metadata attached to schema fields
 */
class Metadata {
  // Various getters/setters for metadata values
  def contains(key: String): Boolean
  def getLong(key: String): Long
  def getDouble(key: String): Double
  def getString(key: String): String
  def getBoolean(key: String): Boolean
  def getMetadata(key: String): Metadata
  def getLongArray(key: String): Array[Long]
  def getDoubleArray(key: String): Array[Double]  
  def getStringArray(key: String): Array[String]
  def getBooleanArray(key: String): Array[Boolean]
  def getMetadataArray(key: String): Array[Metadata]
}

object Metadata {
  val empty: Metadata
  def fromJson(json: String): Metadata
}

Usage Examples

Creating schemas:

import org.apache.spark.sql.types._

// Simple schema
val userSchema = StructType(Array(
  StructField("id", LongType, nullable = false),
  StructField("name", StringType, nullable = false),
  StructField("email", StringType, nullable = true),
  StructField("age", IntegerType, nullable = true)
))

// Complex nested schema
val orderSchema = StructType(Array(
  StructField("orderId", StringType, false),
  StructField("items", ArrayType(StructType(Array(
    StructField("productId", StringType, false),
    StructField("quantity", IntegerType, false),
    StructField("price", DecimalType(10, 2), false)
  )), containsNull = false), false),
  StructField("metadata", MapType(StringType, StringType, true), true)
))

Working with decimal precision:

// High precision financial calculations
val moneyType = DecimalType(19, 4)  // 19 digits total, 4 after decimal
val percentType = DecimalType(5, 4) // 5 digits total, 4 after decimal (0.9999 max)

// System defaults
val systemDecimal = DecimalType.SYSTEM_DEFAULT // DecimalType(38, 18)
val userDecimal = DecimalType.USER_DEFAULT     // DecimalType(10, 0)

Schema parsing:

// Parse from DDL string
val parsedSchema = StructType.fromDDL(
  """
  id BIGINT,
  name STRING,
  scores ARRAY<DOUBLE>,
  profile MAP<STRING, STRING>
  """
)

// Parse individual types
val arrayType = DataType.fromDDL("ARRAY<STRING>") 
val mapType = DataType.fromDDL("MAP<STRING, INT>")