Tessl Tile for maven/org.apache.spark/spark-catalyst_2.11@2.2.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

analysis.md code-generation.md data-types.md expressions.md index.md optimization.md parsing.md query-plans.md utilities.md

utilities.mddocs/

0
# Utilities
1

2
This section covers utility classes for date/time operations, string manipulation, and other common operations in Spark Catalyst.
3

4
## Core Imports
5

6
```scala
7
import org.apache.spark.sql.catalyst.util._
8
import org.apache.spark.unsafe.types.UTF8String
9
import org.apache.spark.sql.types._
10
```
11

12
## Date and Time Utilities
13

14
### DateTimeUtils
15

16
Comprehensive utilities for date and time operations.
17

18
```scala { .api }
19
object DateTimeUtils {
20
  // String conversion
21
  def stringToTimestamp(s: UTF8String): Option[Long]
22
  def timestampToString(us: Long): String  
23
  def dateToString(days: Int): String
24
  def stringToDate(s: UTF8String): Option[Int]
25
  
26
  // Current time functions
27
  def currentTimestamp(): Long
28
  def currentDate(): Int
29
  
30
  // Date arithmetic
31
  def dateAddMonths(days: Int, months: Int): Int
32
  def dateAddInterval(days: Int, interval: CalendarInterval): Int
33
  def timestampAddInterval(timestamp: Long, interval: CalendarInterval): Long
34
  
35
  // Date/time extraction
36
  def getYear(days: Int): Int
37
  def getMonth(days: Int): Int  
38
  def getDayOfMonth(days: Int): Int
39
  def getDayOfYear(days: Int): Int
40
  def getWeekOfYear(days: Int): Int
41
  def getHour(timestamp: Long): Int
42
  def getMinute(timestamp: Long): Int
43
  def getSecond(timestamp: Long): Int
44
  
45
  // Formatting
46
  def formatTimestamp(timestamp: Long, format: String): String
47
  def parseTimestamp(s: String, format: String): Option[Long]
48
  
49
  // Constants
50
  val MICROS_PER_SECOND: Long = 1000000L
51
  val MICROS_PER_MILLIS: Long = 1000L
52
  val MILLIS_PER_SECOND: Long = 1000L
53
  val SECONDS_PER_DAY: Long = 24 * 60 * 60
54
  val MICROS_PER_DAY: Long = SECONDS_PER_DAY * MICROS_PER_SECOND
55
}
56
```
57

58
### Usage Example
59

60
```scala
61
import org.apache.spark.sql.catalyst.util.DateTimeUtils
62
import org.apache.spark.unsafe.types.UTF8String
63

64
// Current date and time
65
val currentTs = DateTimeUtils.currentTimestamp()
66
val currentDate = DateTimeUtils.currentDate()
67

68
// String conversions
69
val dateString = UTF8String.fromString("2023-12-25")
70
val date = DateTimeUtils.stringToDate(dateString) // Some(19724)
71
val dateStr = DateTimeUtils.dateToString(19724) // "2023-12-25"
72

73
// Time extraction
74
val year = DateTimeUtils.getYear(19724) // 2023
75
val month = DateTimeUtils.getMonth(19724) // 12
76
val day = DateTimeUtils.getDayOfMonth(19724) // 25
77

78
// Timestamp operations
79
val timestampString = UTF8String.fromString("2023-12-25 14:30:00")
80
val timestamp = DateTimeUtils.stringToTimestamp(timestampString)
81
val hour = DateTimeUtils.getHour(timestamp.get) // 14
82
val minute = DateTimeUtils.getMinute(timestamp.get) // 30
83
```
84

85
## String Utilities
86

87
### UTF8String
88

89
Efficient UTF-8 string representation optimized for Spark SQL.
90

91
```scala { .api }
92
abstract class UTF8String extends Comparable[UTF8String] {
93
  def numBytes(): Int
94
  def numChars(): Int
95
  def toString(): String
96
  def getBytes(): Array[Byte]
97
  
98
  // String operations
99
  def contains(substring: UTF8String): Boolean
100
  def startsWith(prefix: UTF8String): Boolean
101
  def endsWith(suffix: UTF8String): Boolean
102
  def indexOf(substring: UTF8String, start: Int): Int
103
  def substring(start: Int): UTF8String
104
  def substring(start: Int, until: Int): UTF8String
105
  def trim(): UTF8String
106
  def trimLeft(): UTF8String
107
  def trimRight(): UTF8String
108
  def reverse(): UTF8String
109
  def repeat(times: Int): UTF8String
110
  
111
  // Case operations
112
  def toUpperCase(): UTF8String
113
  def toLowerCase(): UTF8String
114
  
115
  // Comparison
116
  def compare(other: UTF8String): Int
117
  def equals(other: Any): Boolean
118
  def hashCode(): Int
119
  
120
  // Conversion
121
  def toLong(): Long
122
  def toDouble(): Double
123
  def toInt(): Int
124
}
125

126
object UTF8String {
127
  def fromString(str: String): UTF8String
128
  def fromBytes(bytes: Array[Byte]): UTF8String
129
  def fromBytes(bytes: Array[Byte], offset: Int, numBytes: Int): UTF8String
130
  
131
  val EMPTY_UTF8: UTF8String
132
  
133
  // String manipulation utilities
134
  def concat(inputs: UTF8String*): UTF8String
135
  def concatWs(separator: UTF8String, inputs: UTF8String*): UTF8String
136
}
137
```
138

139
### Usage Example
140

141
```scala
142
import org.apache.spark.unsafe.types.UTF8String
143

144
// Create UTF8String instances
145
val str1 = UTF8String.fromString("Hello World")
146
val str2 = UTF8String.fromString("hello")
147

148
// String operations
149
val upper = str2.toUpperCase() // "HELLO"
150
val substring = str1.substring(0, 5) // "Hello"
151
val contains = str1.contains(UTF8String.fromString("World")) // true
152

153
// Concatenation
154
val concat = UTF8String.concat(str2, UTF8String.fromString(" "), str1)
155
val concatWs = UTF8String.concatWs(UTF8String.fromString("-"), str1, str2)
156

157
// Comparison
158
val comparison = str1.compare(str2) // > 0 (case sensitive)
159
```
160

161
## Array Utilities
162

163
### ArrayData
164

165
Abstract representation for array data in Catalyst.
166

167
```scala { .api }
168
abstract class ArrayData {
169
  def numElements(): Int
170
  def isNullAt(ordinal: Int): Boolean
171
  def get(ordinal: Int, elementType: DataType): Any
172
  def getBoolean(ordinal: Int): Boolean
173
  def getByte(ordinal: Int): Byte
174
  def getShort(ordinal: Int): Short
175
  def getInt(ordinal: Int): Int
176
  def getLong(ordinal: Int): Long
177
  def getFloat(ordinal: Int): Float
178
  def getDouble(ordinal: Int): Double
179
  def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal
180
  def getUTF8String(ordinal: Int): UTF8String
181
  def getBinary(ordinal: Int): Array[Byte]
182
  def getInterval(ordinal: Int): CalendarInterval
183
  def getStruct(ordinal: Int, numFields: Int): InternalRow
184
  def getArray(ordinal: Int): ArrayData
185
  def getMap(ordinal: Int): MapData
186
  
187
  def toArray[T](elementType: DataType): Array[T]
188
  def toObjectArray(elementType: DataType): Array[AnyRef]
189
  def copy(): ArrayData
190
}
191
```
192

193
## Map Utilities
194

195
### MapData
196

197
Abstract representation for map data in Catalyst.
198

199
```scala { .api }
200
abstract class MapData {
201
  def numElements(): Int
202
  def keyArray(): ArrayData
203
  def valueArray(): ArrayData
204
  def copy(): MapData
205
  
206
  // Java Map conversion
207
  def toMap[K, V](keyType: DataType, valueType: DataType): java.util.Map[K, V]
208
  def toScalaMap[K, V](keyType: DataType, valueType: DataType): scala.collection.Map[K, V]
209
}
210
```
211

212
## Interval Utilities
213

214
### CalendarInterval
215

216
Represents a calendar interval (years, months, days, microseconds).
217

218
```scala { .api }
219
case class CalendarInterval(months: Int, days: Int, microseconds: Long) {
220
  def add(that: CalendarInterval): CalendarInterval
221
  def subtract(that: CalendarInterval): CalendarInterval
222
  def negate(): CalendarInterval
223
  def toString: String
224
}
225

226
object CalendarInterval {
227
  val EMPTY: CalendarInterval = new CalendarInterval(0, 0, 0)
228
  
229
  def fromString(input: String): CalendarInterval
230
  def fromSingleUnitString(unit: String, input: String): CalendarInterval
231
  
232
  // Factory methods
233
  def fromYearMonthString(input: String): CalendarInterval
234
  def fromDayTimeString(input: String): CalendarInterval
235
}
236
```
237

238
### Usage Example
239

240
```scala
241
import org.apache.spark.sql.catalyst.util.CalendarInterval
242

243
// Create intervals
244
val interval1 = new CalendarInterval(2, 15, 3600000000L) // 2 months, 15 days, 1 hour
245
val interval2 = CalendarInterval.fromString("1 year 2 months 3 days")
246

247
// Interval arithmetic
248
val sum = interval1.add(interval2)
249
val diff = interval1.subtract(interval2)
250
val negated = interval1.negate()
251
```
252

253
## Mathematical Utilities
254

255
### MathUtils
256

257
```scala { .api }
258
object MathUtils {
259
  def floorDiv(x: Long, y: Long): Long
260
  def floorMod(x: Long, y: Long): Long
261
  def addExact(x: Long, y: Long): Long
262
  def subtractExact(x: Long, y: Long): Long
263
  def multiplyExact(x: Long, y: Long): Long
264
  def toIntExact(value: Long): Int
265
  
266
  // Rounding functions
267
  def round(value: Double, scale: Int): Double
268
  def roundUp(value: Double, scale: Int): Double
269
  def roundDown(value: Double, scale: Int): Double
270
}
271
```
272

273
## Collection Utilities
274

275
### AttributeMap
276

277
Specialized map for attributes with efficient lookups.
278

279
```scala { .api }
280
class AttributeMap[A](val baseMap: Map[Attribute, A]) {
281
  def get(k: Attribute): Option[A]
282
  def apply(k: Attribute): A
283
  def contains(k: Attribute): Boolean
284
  def size: Int
285
  def isEmpty: Boolean
286
  def nonEmpty: Boolean
287
  
288
  def ++(other: AttributeMap[A]): AttributeMap[A]
289
  def -(key: Attribute): AttributeMap[A]
290
  def updated(key: Attribute, value: A): AttributeMap[A]
291
  
292
  def keys: Iterable[Attribute]
293
  def values: Iterable[A]
294
  def foreach[U](f: ((Attribute, A)) => U): Unit
295
  def map[B](f: ((Attribute, A)) => (Attribute, B)): AttributeMap[B]
296
}
297

298
object AttributeMap {
299
  def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty)
300
  def apply[A](kvs: (Attribute, A)*): AttributeMap[A] = new AttributeMap(kvs.toMap)
301
}
302
```
303

304
### AttributeSet
305

306
Efficient set implementation for attributes.
307

308
```scala { .api }
309
class AttributeSet private (val baseSet: Set[Attribute]) {
310
  def contains(a: Attribute): Boolean
311
  def subsetOf(other: AttributeSet): Boolean
312
  def intersect(other: AttributeSet): AttributeSet
313
  def ++(other: AttributeSet): AttributeSet
314
  def +(attr: Attribute): AttributeSet
315
  def -(attr: Attribute): AttributeSet
316
  def filter(f: Attribute => Boolean): AttributeSet
317
  def map(f: Attribute => Attribute): AttributeSet
318
  def size: Int
319
  def isEmpty: Boolean
320
  def nonEmpty: Boolean
321
  def toSeq: Seq[Attribute]
322
}
323

324
object AttributeSet {
325
  def empty: AttributeSet = new AttributeSet(Set.empty)
326
  def apply(attrs: Attribute*): AttributeSet = new AttributeSet(attrs.toSet)
327
  def fromAttributeSets(sets: Seq[AttributeSet]): AttributeSet
328
}
329
```
330

331
## Hashing Utilities
332

333
### HashUtils
334

335
```scala { .api }
336
object HashUtils {
337
  def murmur3Hash(input: Any, dataType: DataType, seed: Int): Int
338
  def xxHash64(input: Any, dataType: DataType, seed: Long): Long
339
  
340
  // Hash array elements
341
  def hashArray(array: ArrayData, elementType: DataType, seed: Int): Int
342
  def hashMap(map: MapData, keyType: DataType, valueType: DataType, seed: Int): Int
343
  def hashStruct(struct: InternalRow, structType: StructType, seed: Int): Int
344
}
345
```
346

347
## Complete Usage Example
348

349
```scala
350
import org.apache.spark.sql.catalyst.util._
351
import org.apache.spark.unsafe.types.UTF8String
352
import org.apache.spark.sql.catalyst.InternalRow
353

354
// Working with dates and times
355
val dateStr = UTF8String.fromString("2023-12-25")
356
val date = DateTimeUtils.stringToDate(dateStr).get
357
val year = DateTimeUtils.getYear(date)
358
val formattedDate = DateTimeUtils.dateToString(date)
359

360
// String processing
361
val name = UTF8String.fromString("John Doe")
362
val upperName = name.toUpperCase()
363
val firstName = name.substring(0, 4)
364

365
// Interval operations
366
val interval = new CalendarInterval(1, 0, 0) // 1 month
367
val futureDate = DateTimeUtils.dateAddInterval(date, interval)
368

369
// Working with collections
370
val attrs = Seq(attr1, attr2, attr3)
371
val attrSet = AttributeSet(attrs: _*)
372
val attrMap = AttributeMap(attrs.zip(Seq("value1", "value2", "value3")): _*)
373

374
println(s"Date: $formattedDate, Year: $year")
375
println(s"Name: $upperName, First: $firstName")
376
println(s"Attribute set size: ${attrSet.size}")
377
```
378

379
These utilities provide the foundation for efficient data processing operations throughout the Catalyst framework, enabling high-performance query execution with proper handling of various data types and operations.

Version

Tile

Files

utilities.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities.mddocs/