or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-utilities.mdfile-formats.mdhive-client.mdindex.mdmetastore-integration.mdquery-execution.mdsession-configuration.mdudf-support.md

configuration-utilities.mddocs/

0

# Configuration Utilities

1

2

Comprehensive configuration constants and utilities for Hive integration behavior, metastore connection, and performance optimization.

3

4

## Core Imports

5

6

```scala

7

import org.apache.spark.sql.hive.HiveUtils

8

import org.apache.spark.internal.config.ConfigEntry

9

import org.apache.spark.sql.internal.SQLConf

10

```

11

12

## Capabilities

13

14

### HiveUtils Configuration Constants

15

16

Core configuration constants for controlling Hive integration behavior.

17

18

```scala { .api }

19

object HiveUtils {

20

21

/**

22

* Version of Hive metastore to connect to

23

* Default: Built-in Hive version bundled with Spark

24

*/

25

val HIVE_METASTORE_VERSION: ConfigEntry[String]

26

27

/**

28

* Strategy for loading Hive metastore jars

29

* Options: "builtin", "maven", "path"

30

* Default: "builtin"

31

*/

32

val HIVE_METASTORE_JARS: ConfigEntry[String]

33

34

/**

35

* Classpath locations when using "path" strategy for metastore jars

36

*/

37

val HIVE_METASTORE_JARS_PATH: ConfigEntry[Seq[String]]

38

39

/**

40

* Class prefixes shared between Spark and Hive metastore classloaders

41

*/

42

val HIVE_METASTORE_SHARED_PREFIXES: ConfigEntry[Seq[String]]

43

44

/**

45

* Class prefixes isolated in Hive metastore classloader

46

*/

47

val HIVE_METASTORE_BARRIER_PREFIXES: ConfigEntry[Seq[String]]

48

}

49

```

50

51

### Format Conversion Configuration

52

53

Control automatic conversion of Hive tables to Spark-native formats for better performance.

54

55

```scala { .api }

56

object HiveUtils {

57

/**

58

* Convert Hive Parquet tables to use Spark's native Parquet reader

59

* Provides better performance and feature support

60

* Default: true

61

*/

62

val CONVERT_METASTORE_PARQUET: ConfigEntry[Boolean]

63

64

/**

65

* Convert Hive ORC tables to use Spark's native ORC reader

66

* Enables vectorized reading and better performance

67

* Default: true

68

*/

69

val CONVERT_METASTORE_ORC: ConfigEntry[Boolean]

70

71

/**

72

* Convert CREATE TABLE AS SELECT operations to use data source API

73

* Improves performance for table creation from queries

74

* Default: true

75

*/

76

val CONVERT_METASTORE_CTAS: ConfigEntry[Boolean]

77

78

/**

79

* Convert partitioned table inserts to use data source API

80

* Enables better partition handling and performance

81

* Default: true

82

*/

83

val CONVERT_INSERTING_PARTITIONED_TABLE: ConfigEntry[Boolean]

84

85

/**

86

* Convert INSERT DIRECTORY operations to use data source API

87

* Default: true

88

*/

89

val CONVERT_METASTORE_INSERT_DIR: ConfigEntry[Boolean]

90

}

91

```

92

93

### Thrift Server Configuration

94

95

Configuration for Hive Thrift Server integration.

96

97

```scala { .api }

98

object HiveUtils {

99

/**

100

* Enable asynchronous execution in Hive Thrift Server

101

* Improves concurrency and resource utilization

102

* Default: true

103

*/

104

val HIVE_THRIFT_SERVER_ASYNC: ConfigEntry[Boolean]

105

106

/**

107

* Authentication type for Hive Thrift Server

108

* Options: "NONE", "LDAP", "KERBEROS", "CUSTOM"

109

*/

110

val HIVE_THRIFT_SERVER_AUTHENTICATION: ConfigEntry[String]

111

112

/**

113

* Principal for Kerberos authentication

114

*/

115

val HIVE_THRIFT_SERVER_KERBEROS_PRINCIPAL: ConfigEntry[String]

116

117

/**

118

* Keytab file location for Kerberos authentication

119

*/

120

val HIVE_THRIFT_SERVER_KERBEROS_KEYTAB: ConfigEntry[String]

121

}

122

```

123

124

### Client Configuration Utilities

125

126

Utilities for configuring Hive client connections and behavior.

127

128

```scala { .api }

129

object HiveUtils {

130

/**

131

* Create Hive client configuration from Spark configuration

132

* @param sparkConf Spark configuration

133

* @returns Map of Hive client configuration properties

134

*/

135

def formatHiveConfigs(sparkConf: SparkConf): Map[String, String]

136

137

/**

138

* Get default warehouse directory for Hive tables

139

* @param conf Spark configuration

140

* @returns Warehouse directory path

141

*/

142

def hiveWarehouseLocation(conf: SparkConf): String

143

144

/**

145

* Check if Hive support is available and properly configured

146

* @param sparkConf Spark configuration

147

* @returns true if Hive support can be enabled

148

*/

149

def isHiveAvailable(sparkConf: SparkConf): Boolean

150

151

/**

152

* Get Hive version from configuration or detect from classpath

153

* @param sparkConf Spark configuration

154

* @returns HiveVersion instance

155

*/

156

def hiveVersion(sparkConf: SparkConf): HiveVersion

157

158

/**

159

* Create isolated classloader for Hive client

160

* @param version Hive version to load

161

* @param sparkConf Spark configuration

162

* @param hadoopConf Hadoop configuration

163

* @returns Isolated classloader for Hive

164

*/

165

def createHiveClassLoader(

166

version: HiveVersion,

167

sparkConf: SparkConf,

168

hadoopConf: Configuration

169

): ClassLoader

170

}

171

```

172

173

### Metastore Connection Utilities

174

175

Utilities for managing Hive metastore connections and lifecycle.

176

177

```scala { .api }

178

object HiveUtils {

179

/**

180

* Initialize Hive metastore client with proper configuration

181

* @param sparkConf Spark configuration

182

* @param hadoopConf Hadoop configuration

183

* @returns Configured metastore client

184

*/

185

def newClientForMetadata(

186

sparkConf: SparkConf,

187

hadoopConf: Configuration

188

): HiveClient

189

190

/**

191

* Initialize Hive client for execution with isolation

192

* @param sparkConf Spark configuration

193

* @param hadoopConf Hadoop configuration

194

* @returns Isolated Hive client for execution

195

*/

196

def newClientForExecution(

197

sparkConf: SparkConf,

198

hadoopConf: Configuration

199

): HiveClient

200

201

/**

202

* Close Hive client and clean up resources

203

* @param client Hive client to close

204

*/

205

def closeHiveClient(client: HiveClient): Unit

206

}

207

```

208

209

## Configuration Usage Examples

210

211

### Basic Hive Configuration

212

213

```scala

214

import org.apache.spark.sql.SparkSession

215

216

val spark = SparkSession.builder()

217

.appName("Hive Configuration Example")

218

.config("spark.sql.catalogImplementation", "hive")

219

// Metastore version configuration

220

.config("spark.sql.hive.metastore.version", "2.3.9")

221

.config("spark.sql.hive.metastore.jars", "builtin")

222

// Performance optimizations

223

.config("spark.sql.hive.convertMetastoreParquet", "true")

224

.config("spark.sql.hive.convertMetastoreOrc", "true")

225

.config("spark.sql.hive.convertInsertingPartitionedTable", "true")

226

.enableHiveSupport()

227

.getOrCreate()

228

```

229

230

### External Hive Metastore Configuration

231

232

```scala

233

// Connect to external Hive metastore

234

val spark = SparkSession.builder()

235

.appName("External Hive Metastore")

236

.config("spark.sql.catalogImplementation", "hive")

237

// External metastore configuration

238

.config("spark.sql.hive.metastore.version", "3.1.2")

239

.config("spark.sql.hive.metastore.jars", "maven")

240

.config("hive.metastore.uris", "thrift://metastore-host:9083")

241

// Security configuration

242

.config("hive.metastore.sasl.enabled", "true")

243

.config("hive.metastore.kerberos.principal", "hive/_HOST@REALM.COM")

244

.enableHiveSupport()

245

.getOrCreate()

246

```

247

248

### Custom Jar Path Configuration

249

250

```scala

251

// Use custom Hive jars from specific path

252

val customHiveJars = Seq(

253

"/opt/hive/lib/hive-metastore-3.1.2.jar",

254

"/opt/hive/lib/hive-exec-3.1.2.jar",

255

"/opt/hive/lib/hive-common-3.1.2.jar"

256

)

257

258

val spark = SparkSession.builder()

259

.appName("Custom Hive Jars")

260

.config("spark.sql.catalogImplementation", "hive")

261

.config("spark.sql.hive.metastore.version", "3.1.2")

262

.config("spark.sql.hive.metastore.jars", "path")

263

.config("spark.sql.hive.metastore.jars.path", customHiveJars.mkString(","))

264

.enableHiveSupport()

265

.getOrCreate()

266

```

267

268

### Performance Tuning Configuration

269

270

```scala

271

// Optimize for performance with native format conversion

272

spark.conf.set("spark.sql.hive.convertMetastoreParquet", "true")

273

spark.conf.set("spark.sql.hive.convertMetastoreOrc", "true")

274

spark.conf.set("spark.sql.hive.convertInsertingPartitionedTable", "true")

275

276

// ORC-specific optimizations

277

spark.conf.set("spark.sql.orc.impl", "native")

278

spark.conf.set("spark.sql.orc.enableVectorizedReader", "true")

279

spark.conf.set("spark.sql.orc.columnarReaderBatchSize", "4096")

280

281

// Parquet optimizations

282

spark.conf.set("spark.sql.parquet.enableVectorizedReader", "true")

283

spark.conf.set("spark.sql.parquet.columnarReaderBatchSize", "4096")

284

```

285

286

### Thrift Server Configuration

287

288

```scala

289

// Configure Hive Thrift Server with authentication

290

val spark = SparkSession.builder()

291

.appName("Hive Thrift Server")

292

.config("spark.sql.catalogImplementation", "hive")

293

// Thrift server settings

294

.config("spark.sql.hive.thriftServer.async", "true")

295

.config("hive.server2.authentication", "KERBEROS")

296

.config("hive.server2.authentication.kerberos.principal", "hive/_HOST@REALM.COM")

297

.config("hive.server2.authentication.kerberos.keytab", "/etc/hive/hive.keytab")

298

// Connection limits

299

.config("hive.server2.thrift.max.worker.threads", "500")

300

.config("hive.server2.session.check.interval", "60000")

301

.enableHiveSupport()

302

.getOrCreate()

303

```

304

305

### Dynamic Configuration Management

306

307

```scala

308

import org.apache.spark.sql.hive.HiveUtils

309

310

// Check if Hive is available

311

if (HiveUtils.isHiveAvailable(spark.sparkContext.getConf)) {

312

println("Hive support is available")

313

314

// Get current Hive version

315

val hiveVersion = HiveUtils.hiveVersion(spark.sparkContext.getConf)

316

println(s"Using Hive version: ${hiveVersion.fullVersion}")

317

318

// Get warehouse location

319

val warehouseLocation = HiveUtils.hiveWarehouseLocation(spark.sparkContext.getConf)

320

println(s"Hive warehouse: $warehouseLocation")

321

322

// Format Hive configurations

323

val hiveConfigs = HiveUtils.formatHiveConfigs(spark.sparkContext.getConf)

324

hiveConfigs.foreach { case (key, value) =>

325

println(s"$key = $value")

326

}

327

} else {

328

println("Hive support is not available")

329

}

330

```

331

332

### Class Loading Configuration

333

334

```scala

335

// Configure class loading for Hive integration

336

spark.conf.set("spark.sql.hive.metastore.sharedPrefixes",

337

"com.mysql.jdbc,org.postgresql,com.microsoft.sqlserver,oracle.jdbc")

338

339

spark.conf.set("spark.sql.hive.metastore.barrierPrefixes",

340

"org.apache.hive.service.rpc.thrift,org.apache.hadoop.hive.metastore.api")

341

342

// Custom class loading strategy

343

val customClassLoader = HiveUtils.createHiveClassLoader(

344

hiveVersion = HiveVersion("3.1.2", 3, 1),

345

sparkConf = spark.sparkContext.getConf,

346

hadoopConf = spark.sparkContext.hadoopConfiguration

347

)

348

```

349

350

## Error Handling

351

352

Common configuration-related errors and their solutions:

353

354

```scala

355

import org.apache.spark.sql.AnalysisException

356

357

try {

358

val spark = SparkSession.builder()

359

.config("spark.sql.hive.metastore.version", "invalid-version")

360

.enableHiveSupport()

361

.getOrCreate()

362

} catch {

363

case e: IllegalArgumentException if e.getMessage.contains("Unsupported Hive version") =>

364

println("Invalid Hive version specified")

365

case e: ClassNotFoundException =>

366

println("Hive jars not found in classpath")

367

case e: AnalysisException if e.getMessage.contains("Hive support is required") =>

368

println("Hive support not properly configured")

369

}

370

```

371

372

## Types

373

374

### Configuration Types

375

376

```scala { .api }

377

case class HiveClientConfig(

378

version: String,

379

jarsStrategy: String,

380

jarsPath: Seq[String],

381

sharedPrefixes: Seq[String],

382

barrierPrefixes: Seq[String]

383

)

384

385

case class MetastoreConfig(

386

uris: String,

387

principal: Option[String],

388

keytab: Option[String],

389

saslEnabled: Boolean

390

)

391

392

case class ThriftServerConfig(

393

async: Boolean,

394

authentication: String,

395

maxWorkerThreads: Int,

396

sessionCheckInterval: Long

397

)

398

```