tessl/maven-org-apache-spark--spark-catalyst-2-10

Catalyst is a library for manipulating relational query plans used as the foundation for Spark SQL's query optimizer and execution engine

Overview

Eval results

Files

Type System

Name: tessl/maven-org-apache-spark--spark-catalyst-2-10
Author: tessl

Comprehensive type system supporting all SQL types including primitives, complex nested types, and user-defined types for representing data schemas in Catalyst.

Capabilities

DataType Base Class

The abstract base class for all Spark SQL data types.

/**
 * The base type of all Spark SQL data types.
 */
abstract class DataType extends AbstractDataType {
  /**
   * The default size of a value of this data type, used internally for size estimation.
   */
  def defaultSize: Int
  
  /** Name of the type used in JSON serialization */
  def typeName: String
  
  /** The compact JSON representation of this data type */
  def json: String
  
  /** The pretty (i.e. indented) JSON representation of this data type */
  def prettyJson: String
  
  /** Readable string representation for the type */
  def simpleString: String
  
  /**
   * Check if `this` and `other` are the same data type when ignoring nullability
   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
   */
  def sameType(other: DataType): Boolean
  
  /**
   * Returns the same data type but set all nullability fields are true
   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
   */
  def asNullable: DataType
}

Usage Examples:

import org.apache.spark.sql.types._

// Get type information
val intType = IntegerType
val typeName = intType.typeName        // "integer"
val json = intType.json               // "integer"
val prettyJson = intType.prettyJson   // "integer"
val simple = intType.simpleString     // "int"
val size = intType.defaultSize        // 4

// Compare types
val stringType = StringType
val same = intType.sameType(stringType)  // false
val sameInt = intType.sameType(IntegerType)  // true

// Create nullable version
val nullableArray = ArrayType(IntegerType, containsNull = false)
val nullable = nullableArray.asNullable
// ArrayType(IntegerType, containsNull = true)

Primitive Data Types

Basic data types representing primitive values.

/** Boolean data type */
object BooleanType extends DataType {
  def defaultSize: Int = 1
}

/** Byte data type */
object ByteType extends DataType {
  def defaultSize: Int = 1
}

/** Short integer data type */
object ShortType extends DataType {
  def defaultSize: Int = 2
}

/** Integer data type */
object IntegerType extends DataType {
  def defaultSize: Int = 4
}

/** Long integer data type */
object LongType extends DataType {
  def defaultSize: Int = 8
}

/** Float data type */
object FloatType extends DataType {
  def defaultSize: Int = 4
}

/** Double data type */
object DoubleType extends DataType {
  def defaultSize: Int = 8
}

/** String data type */
object StringType extends DataType {
  def defaultSize: Int = 20
}

/** Binary data type */
object BinaryType extends DataType {
  def defaultSize: Int = 100
}

/** Date data type */
object DateType extends DataType {
  def defaultSize: Int = 4
}

/** Timestamp data type */
object TimestampType extends DataType {
  def defaultSize: Int = 12
}

/** Null data type */
object NullType extends DataType {
  def defaultSize: Int = 1
}

/** Calendar interval data type */
object CalendarIntervalType extends DataType {
  def defaultSize: Int = 16
}

Usage Examples:

import org.apache.spark.sql.types._

// Use primitive types
val schema = StructType(Seq(
  StructField("id", IntegerType, nullable = false),
  StructField("name", StringType, nullable = true),
  StructField("active", BooleanType, nullable = false),
  StructField("score", DoubleType, nullable = true),
  StructField("created", TimestampType, nullable = false)
))

// Type checking
def processValue(value: Any, dataType: DataType): String = dataType match {
  case IntegerType => s"Integer: $value"
  case StringType => s"String: $value"
  case BooleanType => s"Boolean: $value"
  case DoubleType => s"Double: $value"
  case _ => s"Other: $value"
}

Complex Data Types

Data types for complex nested structures including arrays, maps, and structs.

/**
 * Array data type with element type and nullability
 */
case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType {
  def defaultSize: Int = 100
  
  /** Creates a new ArrayType with the same element type but different nullability */
  def copy(elementType: DataType = elementType, containsNull: Boolean = containsNull): ArrayType
}

/**
 * Map data type with key/value types and nullability
 */
case class MapType(
    keyType: DataType,
    valueType: DataType,
    valueContainsNull: Boolean) extends DataType {
  def defaultSize: Int = 100
  
  /** Creates a new MapType with updated parameters */
  def copy(
      keyType: DataType = keyType,
      valueType: DataType = valueType,  
      valueContainsNull: Boolean = valueContainsNull): MapType
}

/**
 * Struct data type with named fields
 */
case class StructType(fields: Array[StructField]) extends DataType {
  def defaultSize: Int = fields.map(_.dataType.defaultSize).sum
  
  /** Get field by name */
  def apply(name: String): StructField
  
  /** Get field names */
  def fieldNames: Array[String]
  
  /** Add a new field */
  def add(field: StructField): StructType
  def add(name: String, dataType: DataType): StructType
  def add(name: String, dataType: DataType, nullable: Boolean): StructType
  def add(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): StructType
}

/**
 * Individual field in a struct with name, type, nullable flag, and metadata
 */
case class StructField(
    name: String,
    dataType: DataType,
    nullable: Boolean,
    metadata: Metadata = Metadata.empty) {
  
  /** Creates a new StructField with updated parameters */
  def copy(
      name: String = name,
      dataType: DataType = dataType,
      nullable: Boolean = nullable,
      metadata: Metadata = metadata): StructField
}

Usage Examples:

import org.apache.spark.sql.types._

// Create array type
val intArray = ArrayType(IntegerType, containsNull = true)
val stringArray = ArrayType(StringType, containsNull = false)

// Create map type  
val stringToInt = MapType(StringType, IntegerType, valueContainsNull = false)
val idToUser = MapType(IntegerType, StringType, valueContainsNull = true)

// Create struct type
val userSchema = StructType(Array(
  StructField("id", IntegerType, nullable = false),
  StructField("name", StringType, nullable = true),
  StructField("emails", ArrayType(StringType, containsNull = false), nullable = true),
  StructField("properties", MapType(StringType, StringType, valueContainsNull = true), nullable = true)
))

// Access struct fields
val nameField = userSchema("name")
val fieldNames = userSchema.fieldNames  // Array("id", "name", "emails", "properties")

// Add fields to struct
val extendedSchema = userSchema
  .add("created_at", TimestampType, nullable = false)
  .add("active", BooleanType, nullable = false)

// Complex nested types
val nestedSchema = StructType(Array(
  StructField("users", ArrayType(userSchema, containsNull = false), nullable = true),
  StructField("metadata", MapType(StringType, 
    StructType(Array(
      StructField("value", StringType, nullable = true),
      StructField("type", StringType, nullable = false)
    )), valueContainsNull = true), nullable = true)
))

Decimal Types

High-precision decimal types for financial and scientific calculations.

/**
 * Decimal data type with precision and scale
 */
case class DecimalType(precision: Int, scale: Int) extends DataType {
  def defaultSize: Int = 8
  
  /** Maximum allowed precision */
  def isWiderThan(other: DataType): Boolean
  
  /** Whether this decimal type is tighter than another */
  def isTighterThan(other: DataType): Boolean
}

object DecimalType {
  /** System default decimal type */
  val SYSTEM_DEFAULT: DecimalType
  
  /** User default decimal type */  
  val USER_DEFAULT: DecimalType
  
  /** Maximum precision allowed */
  val MAX_PRECISION: Int = 38
  
  /** Maximum scale allowed */
  val MAX_SCALE: Int = 38
  
  /** Create decimal type with precision and scale */
  def apply(precision: Int, scale: Int): DecimalType
  
  /** Create unbounded decimal type */
  def UNLIMITED(): DecimalType
}

/**
 * Decimal value implementation with high precision arithmetic
 */
case class Decimal(value: java.math.BigDecimal) extends Ordered[Decimal] {
  /** Convert to different precision and scale */
  def toPrecision(precision: Int, scale: Int): Decimal
  
  /** Basic arithmetic operations */
  def +(that: Decimal): Decimal
  def -(that: Decimal): Decimal  
  def *(that: Decimal): Decimal
  def /(that: Decimal): Decimal
  def %(that: Decimal): Decimal
  
  /** Comparison operations */
  def compare(that: Decimal): Int
  
  /** Convert to other numeric types */
  def toDouble: Double
  def toFloat: Float
  def toLong: Long
  def toInt: Int
  
  /** String representation */
  override def toString: String
}

object Decimal {
  /** Create decimal from various sources */
  def apply(value: BigDecimal): Decimal
  def apply(value: java.math.BigDecimal): Decimal
  def apply(value: String): Decimal
  def apply(value: Long): Decimal
  def apply(value: Double): Decimal
  
  /** Zero and One constants */
  val ZERO: Decimal
  val ONE: Decimal
}

Usage Examples:

import org.apache.spark.sql.types._

// Create decimal types
val currency = DecimalType(10, 2)  // 10 digits, 2 decimal places
val percentage = DecimalType(5, 4)  // 5 digits, 4 decimal places
val unlimited = DecimalType.UNLIMITED()

// Create decimal values
val price = Decimal("123.45")
val rate = Decimal(0.0825)
val count = Decimal(1000)

// Arithmetic operations
val total = price * count          // Decimal("123450.00")
val withTax = total * (Decimal.ONE + rate)  // Apply tax rate
val rounded = withTax.toPrecision(10, 2)    // Round to currency precision

// Use in schema
val transactionSchema = StructType(Array(
  StructField("id", StringType, nullable = false),
  StructField("amount", DecimalType(12, 2), nullable = false),
  StructField("tax_rate", DecimalType(6, 4), nullable = false),
  StructField("total", DecimalType(15, 2), nullable = false)
))

Advanced Types

Specialized types for advanced use cases including user-defined types and abstract matching.

/**
 * Base class for user-defined types
 */
abstract class UserDefinedType[UserType >: Null] extends DataType {
  /** The underlying SQL data type for this UDT */
  def sqlType: DataType
  
  /** Serialize the user type to SQL type */
  def serialize(obj: UserType): Any
  
  /** Deserialize from SQL type to user type */
  def deserialize(datum: Any): UserType
  
  /** User class that this UDT represents */  
  def userClass: Class[UserType]
}

/**
 * Abstract base for type matching in expressions
 */
abstract class AbstractDataType {
  /** The default concrete type to use if we need to cast into this type */
  def defaultConcreteType: DataType
  
  /** Check if the given data type is an instance of this abstract type */  
  def acceptsType(other: DataType): Boolean
  
  /** Readable string representation */
  def simpleString: String
}

/**
 * Type for JVM objects  
 */
case class ObjectType(cls: Class[_]) extends DataType {
  def defaultSize: Int = 4096
}

Usage Examples:

import org.apache.spark.sql.types._

// Define a custom UDT for a Point class
case class Point(x: Double, y: Double)

class PointUDT extends UserDefinedType[Point] {
  override def sqlType: DataType = StructType(Array(
    StructField("x", DoubleType, nullable = false),
    StructField("y", DoubleType, nullable = false)
  ))
  
  override def serialize(point: Point): Any = {
    val row = new GenericInternalRow(2)
    row.setDouble(0, point.x)
    row.setDouble(1, point.y)
    row
  }
  
  override def deserialize(datum: Any): Point = {
    datum match {
      case row: InternalRow =>
        Point(row.getDouble(0), row.getDouble(1))
    }
  }
  
  override def userClass: Class[Point] = classOf[Point]
}

// Use custom UDT in schema
val pointUDT = new PointUDT()
val locationSchema = StructType(Array(
  StructField("name", StringType, nullable = false),
  StructField("location", pointUDT, nullable = false)
))

// Object type for arbitrary JVM objects
val stringObjectType = ObjectType(classOf[String])
val listObjectType = ObjectType(classOf[java.util.List[_]])

Type Utilities and Constants

Utility objects and methods for working with the type system.

object DataType {
  /**
   * Compares two types, ignoring nullability of ArrayType, MapType, StructType.
   */
  def equalsIgnoreNullability(left: DataType, right: DataType): Boolean
  
  /**
   * Compares two types, ignoring compatible nullability.
   */  
  def equalsIgnoreCompatibleNullability(left: DataType, right: DataType): Boolean
  
  /** Parse a data type from string representation */
  def fromJson(json: String): DataType
  
  /** Parse a data type from DDL string */
  def fromDDL(ddl: String): DataType
}

/** Metadata for struct fields and other schema elements */
case class Metadata(map: Map[String, Any]) {
  def contains(key: String): Boolean
  def getString(key: String): String
  def getLong(key: String): Long
  def getDouble(key: String): Double
  def getBoolean(key: String): Boolean
  def getMetadata(key: String): Metadata
  def getStringArray(key: String): Array[String]
  def getLongArray(key: String): Array[Long]
  def getDoubleArray(key: String): Array[Double]
  def getBooleanArray(key: String): Array[Boolean]
  def getMetadataArray(key: String): Array[Metadata]
  def json: String
}

object Metadata {
  /** Empty metadata */
  val empty: Metadata
  
  /** Create metadata from JSON */
  def fromJson(json: String): Metadata
  
  /** Create metadata builder */
  def builder(): MetadataBuilder
}

Usage Examples:

import org.apache.spark.sql.types._

// Compare types ignoring nullability
val nullable = StructType(Array(StructField("x", IntegerType, nullable = true)))
val nonNullable = StructType(Array(StructField("x", IntegerType, nullable = false)))
val equal = DataType.equalsIgnoreNullability(nullable, nonNullable)  // true

// Parse types from strings
val parsedType = DataType.fromDDL("struct<id:int,name:string>")
val jsonType = DataType.fromJson("""{"type":"integer"}""")

// Work with metadata
val metadata = Metadata.builder()
  .putString("description", "User identifier")
  .putLong("version", 1L)
  .putBoolean("required", true)
  .build()

val fieldWithMetadata = StructField("id", IntegerType, nullable = false, metadata)

// Access metadata
val description = fieldWithMetadata.metadata.getString("description")
val version = fieldWithMetadata.metadata.getLong("version")
val required = fieldWithMetadata.metadata.getBoolean("required")

Install with Tessl CLI

npx tessl i tessl/maven-org-apache-spark--spark-catalyst-2-10

docs

tessl/maven-org-apache-spark--spark-catalyst-2-10

types.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

Type System

Capabilities

DataType Base Class

Primitive Data Types

Complex Data Types

Decimal Types

Advanced Types

Type Utilities and Constants

types.mddocs/