or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analysis.mdcode-generation.mddata-types.mdexpressions.mdindex.mdoptimization.mdparsing.mdquery-plans.mdutilities.md

data-types.mddocs/

0

# Data Types and Structures

1

2

This section covers the core data type system of Spark Catalyst, including primitive types, complex types (arrays, maps, structs), Row interface for data access, and encoders for type conversion.

3

4

## Core Imports

5

6

```scala

7

import org.apache.spark.sql.Row

8

import org.apache.spark.sql.types._

9

import org.apache.spark.sql.Encoder

10

import org.apache.spark.sql.Encoders

11

```

12

13

## Row Interface

14

15

The Row trait provides the primary interface for accessing structured data in Spark SQL.

16

17

### Row (trait)

18

19

```scala { .api }

20

trait Row {

21

def apply(i: Int): Any

22

def get(i: Int): Any

23

def isNullAt(i: Int): Boolean

24

def getInt(i: Int): Int

25

def getLong(i: Int): Long

26

def getFloat(i: Int): Float

27

def getDouble(i: Int): Double

28

def getString(i: Int): String

29

def getBoolean(i: Int): Boolean

30

def getByte(i: Int): Byte

31

def getShort(i: Int): Short

32

def getDecimal(i: Int): java.math.BigDecimal

33

def getDate(i: Int): java.sql.Date

34

def getTimestamp(i: Int): java.sql.Timestamp

35

def getSeq[T](i: Int): Seq[T]

36

def getList[T](i: Int): java.util.List[T]

37

def getMap[K, V](i: Int): scala.collection.Map[K, V]

38

def getJavaMap[K, V](i: Int): java.util.Map[K, V]

39

def getStruct(i: Int): Row

40

def getAs[T](i: Int): T

41

def getAs[T](fieldName: String): T

42

def length: Int

43

def size: Int

44

def schema: StructType

45

def copy(): Row

46

def toSeq: Seq[Any]

47

}

48

```

49

50

### Row (object)

51

52

Factory methods for creating Row instances.

53

54

```scala { .api }

55

object Row {

56

def unapplySeq(row: Row): Some[Seq[Any]]

57

def apply(values: Any*): Row

58

def fromSeq(values: Seq[Any]): Row

59

def fromTuple(tuple: Product): Row

60

def merge(rows: Row*): Row

61

def empty: Row

62

}

63

```

64

65

#### Usage Example

66

67

```scala

68

import org.apache.spark.sql.Row

69

70

// Create a row

71

val row = Row("Alice", 25, true)

72

73

// Access data by index

74

val name: String = row.getString(0)

75

val age: Int = row.getInt(1)

76

val isActive: Boolean = row.getBoolean(2)

77

78

// Generic access

79

val nameGeneric: String = row.getAs[String](0)

80

81

// Check for null values

82

if (!row.isNullAt(1)) {

83

val age = row.getInt(1)

84

}

85

```

86

87

## Data Type Hierarchy

88

89

### DataType (abstract class)

90

91

Base class for all Spark SQL data types.

92

93

```scala { .api }

94

abstract class DataType {

95

def defaultSize: Int

96

def typeName: String

97

def json: String

98

def prettyJson: String

99

def simpleString: String

100

def catalogString: String

101

def sql: String

102

def sameType(other: DataType): Boolean

103

def asNullable: DataType

104

def existsRecursively(f: (DataType) => Boolean): Boolean

105

}

106

```

107

108

### DataType (object)

109

110

Factory methods and utilities for DataType instances.

111

112

```scala { .api }

113

object DataType {

114

def fromJson(json: String): DataType

115

def fromDDL(ddl: String): DataType

116

def equalsIgnoreCompatibleNullability(from: DataType, to: DataType): Boolean

117

def equalsIgnoreNullability(from: DataType, to: DataType): Boolean

118

}

119

```

120

121

## Primitive Data Types

122

123

### Numeric Types

124

125

```scala { .api }

126

case object ByteType extends IntegralType

127

case object ShortType extends IntegralType

128

case object IntegerType extends IntegralType

129

case object LongType extends IntegralType

130

case object FloatType extends FractionalType

131

case object DoubleType extends FractionalType

132

133

case class DecimalType(precision: Int, scale: Int) extends FractionalType {

134

def this() = this(10, 0)

135

}

136

137

object DecimalType {

138

val USER_DEFAULT: DecimalType

139

val SYSTEM_DEFAULT: DecimalType

140

def apply(): DecimalType

141

def bounded(precision: Int, scale: Int): DecimalType

142

def unbounded: DecimalType

143

def unapply(t: DataType): Option[(Int, Int)]

144

}

145

```

146

147

### String and Binary Types

148

149

```scala { .api }

150

case object StringType extends AtomicType

151

case object BinaryType extends AtomicType

152

```

153

154

### Boolean Type

155

156

```scala { .api }

157

case object BooleanType extends AtomicType

158

```

159

160

### Date and Time Types

161

162

```scala { .api }

163

case object DateType extends AtomicType

164

case object TimestampType extends AtomicType

165

case object CalendarIntervalType extends DataType

166

```

167

168

### Null Type

169

170

```scala { .api }

171

case object NullType extends DataType

172

```

173

174

## Complex Data Types

175

176

### Array Type

177

178

```scala { .api }

179

case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataType {

180

def this(elementType: DataType) = this(elementType, containsNull = true)

181

def buildFormattedString(prefix: String, buffer: StringBuffer): Unit

182

}

183

184

object ArrayType {

185

def apply(elementType: DataType): ArrayType

186

}

187

```

188

189

### Map Type

190

191

```scala { .api }

192

case class MapType(

193

keyType: DataType,

194

valueType: DataType,

195

valueContainsNull: Boolean

196

) extends DataType {

197

def this(keyType: DataType, valueType: DataType) =

198

this(keyType, valueType, valueContainsNull = true)

199

def buildFormattedString(prefix: String, buffer: StringBuffer): Unit

200

}

201

202

object MapType {

203

def apply(keyType: DataType, valueType: DataType): MapType

204

}

205

```

206

207

### Struct Type

208

209

```scala { .api }

210

case class StructType(fields: Array[StructField]) extends DataType {

211

def this(fields: Seq[StructField]) = this(fields.toArray)

212

def this(fields: java.util.List[StructField]) = this(fields.asScala.toArray)

213

214

def apply(name: String): StructField

215

def apply(names: Set[String]): StructType

216

def fieldNames: Array[String]

217

def names: Seq[String]

218

def length: Int

219

def iterator: Iterator[StructField]

220

def getFieldIndex(name: String): Option[Int]

221

def indexOf(name: String): Int

222

def add(field: StructField): StructType

223

def add(name: String, dataType: DataType): StructType

224

def add(name: String, dataType: DataType, nullable: Boolean): StructType

225

def add(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): StructType

226

def add(name: String, dataType: String): StructType

227

def add(name: String, dataType: String, nullable: Boolean): StructType

228

def add(name: String, dataType: String, nullable: Boolean, metadata: Metadata): StructType

229

}

230

231

object StructType {

232

def apply(fields: Seq[StructField]): StructType

233

def apply(fields: java.util.List[StructField]): StructType

234

def fromDDL(ddl: String): StructType

235

}

236

```

237

238

### Struct Field

239

240

```scala { .api }

241

case class StructField(

242

name: String,

243

dataType: DataType,

244

nullable: Boolean,

245

metadata: Metadata

246

) {

247

def this(name: String, dataType: DataType, nullable: Boolean) =

248

this(name, dataType, nullable, Metadata.empty)

249

def this(name: String, dataType: DataType) =

250

this(name, dataType, nullable = true, Metadata.empty)

251

252

def getComment(): Option[String]

253

def withComment(comment: String): StructField

254

}

255

256

object StructField {

257

def apply(name: String, dataType: DataType): StructField

258

def apply(name: String, dataType: DataType, nullable: Boolean): StructField

259

}

260

```

261

262

#### Usage Example

263

264

```scala

265

import org.apache.spark.sql.types._

266

267

// Create a struct type

268

val schema = StructType(Array(

269

StructField("name", StringType, nullable = false),

270

StructField("age", IntegerType, nullable = true),

271

StructField("scores", ArrayType(DoubleType), nullable = true),

272

StructField("metadata", MapType(StringType, StringType), nullable = true)

273

))

274

275

// Access field information

276

val nameField = schema("name")

277

val fieldNames = schema.fieldNames

278

val nameIndex = schema.indexOf("name")

279

```

280

281

## Encoders

282

283

Type-safe conversion between JVM objects and Spark SQL's internal representation.

284

285

### Encoder (trait)

286

287

```scala { .api }

288

trait Encoder[T] {

289

def schema: StructType

290

def clsTag: ClassTag[T]

291

}

292

```

293

294

### Encoders (object)

295

296

Factory methods for creating encoder instances.

297

298

```scala { .api }

299

object Encoders {

300

def BOOLEAN: Encoder[java.lang.Boolean]

301

def BYTE: Encoder[java.lang.Byte]

302

def SHORT: Encoder[java.lang.Short]

303

def INT: Encoder[java.lang.Integer]

304

def LONG: Encoder[java.lang.Long]

305

def FLOAT: Encoder[java.lang.Float]

306

def DOUBLE: Encoder[java.lang.Double]

307

def STRING: Encoder[java.lang.String]

308

def DECIMAL: Encoder[java.math.BigDecimal]

309

def DATE: Encoder[java.sql.Date]

310

def TIMESTAMP: Encoder[java.sql.Timestamp]

311

def BINARY: Encoder[Array[Byte]]

312

313

def bean[T](beanClass: Class[T]): Encoder[T]

314

def kryo[T: ClassTag]: Encoder[T]

315

def kryo[T](clazz: Class[T]): Encoder[T]

316

def javaSerialization[T: ClassTag]: Encoder[T]

317

def javaSerialization[T](clazz: Class[T]): Encoder[T]

318

319

def tuple[T1, T2](e1: Encoder[T1], e2: Encoder[T2]): Encoder[(T1, T2)]

320

def tuple[T1, T2, T3](e1: Encoder[T1], e2: Encoder[T2], e3: Encoder[T3]): Encoder[(T1, T2, T3)]

321

def tuple[T1, T2, T3, T4](e1: Encoder[T1], e2: Encoder[T2], e3: Encoder[T3], e4: Encoder[T4]): Encoder[(T1, T2, T3, T4)]

322

def tuple[T1, T2, T3, T4, T5](e1: Encoder[T1], e2: Encoder[T2], e3: Encoder[T3], e4: Encoder[T4], e5: Encoder[T5]): Encoder[(T1, T2, T3, T4, T5)]

323

324

def product[T <: Product : TypeTag]: Encoder[T]

325

326

def scalaInt: Encoder[Int]

327

def scalaLong: Encoder[Long]

328

def scalaDouble: Encoder[Double]

329

def scalaFloat: Encoder[Float]

330

def scalaByte: Encoder[Byte]

331

def scalaShort: Encoder[Short]

332

def scalaBoolean: Encoder[Boolean]

333

}

334

```

335

336

#### Usage Example

337

338

```scala

339

import org.apache.spark.sql.Encoders

340

341

// Primitive encoders

342

val stringEncoder = Encoders.STRING

343

val intEncoder = Encoders.scalaInt

344

345

// Bean encoder for Java objects

346

case class Person(name: String, age: Int)

347

val personEncoder = Encoders.product[Person]

348

349

// Kryo encoder for complex objects

350

val kryoEncoder = Encoders.kryo[MyComplexClass]

351

```

352

353

## Metadata

354

355

Associated metadata for struct fields.

356

357

```scala { .api }

358

case class Metadata(map: Map[String, Any]) {

359

def contains(key: String): Boolean

360

def getLong(key: String): Long

361

def getDouble(key: String): Double

362

def getBoolean(key: String): Boolean

363

def getString(key: String): String

364

def getMetadata(key: String): Metadata

365

def getLongArray(key: String): Array[Long]

366

def getDoubleArray(key: String): Array[Double]

367

def getBooleanArray(key: String): Array[Boolean]

368

def getStringArray(key: String): Array[String]

369

def getMetadataArray(key: String): Array[Metadata]

370

def json: String

371

}

372

373

object Metadata {

374

def empty: Metadata

375

def fromJson(json: String): Metadata

376

}

377

```

378

379

## Exception Types

380

381

### AnalysisException

382

383

Exception thrown when query analysis fails.

384

385

```scala { .api }

386

class AnalysisException(

387

message: String,

388

line: Option[Int] = None,

389

startPosition: Option[Int] = None,

390

plan: Option[LogicalPlan] = None,

391

cause: Option[Throwable] = None

392

) extends Exception {

393

def withPosition(line: Option[Int], startPosition: Option[Int]): AnalysisException

394

override def getMessage: String

395

def getSimpleMessage: String

396

}

397

```