Core functionality for creating and managing Spark sessions with Hive integration, providing both modern SparkSession-based approach and legacy HiveContext support.
Modern approach for enabling Hive integration using SparkSession builder pattern.
/**
* Enable Hive support for SparkSession, providing access to Hive metastore,
* HiveQL query execution, and Hive UDF/UDAF/UDTF functions
*/
def enableHiveSupport(): SparkSession.BuilderUsage Examples:
import org.apache.spark.sql.SparkSession
// Basic Hive-enabled session
val spark = SparkSession.builder()
.appName("Hive Integration App")
.enableHiveSupport()
.getOrCreate()
// With additional configuration
val spark = SparkSession.builder()
.appName("Advanced Hive App")
.config("spark.sql.warehouse.dir", "/user/hive/warehouse")
.config("spark.sql.hive.metastore.version", "2.3.0")
.enableHiveSupport()
.getOrCreate()
// Execute HiveQL
spark.sql("SHOW DATABASES").show()
spark.sql("USE my_database")
val result = spark.sql("SELECT * FROM my_table LIMIT 10")⚠️ DEPRECATED: HiveContext is fully deprecated since Spark 2.0.0 and should not be used in new applications. All functionality has been replaced by SparkSession.builder().enableHiveSupport().
/**
* Legacy Hive integration context - FULLY DEPRECATED since 2.0.0
* This class is a thin wrapper around SparkSession and will be removed in future versions
* Use SparkSession.builder.enableHiveSupport instead
*/
@deprecated("Use SparkSession.builder.enableHiveSupport instead", "2.0.0")
class HiveContext private[hive](_sparkSession: SparkSession) extends SQLContext(_sparkSession) {
/**
* Create HiveContext from SparkContext
*/
def this(sc: SparkContext)
/**
* Create HiveContext from JavaSparkContext
*/
def this(sc: JavaSparkContext)
/**
* Create new HiveContext session with separated SQLConf, UDF/UDAF,
* temporary tables and SessionState, but sharing CacheManager,
* IsolatedClientLoader and Hive client
*/
override def newSession(): HiveContext
/**
* Invalidate and refresh cached metadata for the given table
* @param tableName - Name of table to refresh
*/
def refreshTable(tableName: String): Unit
}Usage Examples (DO NOT USE - Deprecated):
// ❌ DEPRECATED - DO NOT USE IN NEW CODE
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext
// Create HiveContext (deprecated approach)
val conf = new SparkConf().setAppName("Hive Legacy App")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)
// Execute queries
val result = hiveContext.sql("SELECT * FROM my_table")
result.show()
// Refresh table metadata
hiveContext.refreshTable("my_table")
// Create new session
val newSession = hiveContext.newSession()✅ Use This Instead:
import org.apache.spark.sql.SparkSession
// Modern approach (recommended)
val spark = SparkSession.builder()
.appName("Modern Hive App")
.enableHiveSupport()
.getOrCreate()
// Execute queries (same API)
val result = spark.sql("SELECT * FROM my_table")
result.show()
// Refresh table metadata
spark.catalog.refreshTable("my_table")
// Create new session
val newSession = spark.newSession()Components for managing Hive-aware session state and resources.
/**
* Builder for Hive-aware SessionState
*/
class HiveSessionStateBuilder(
session: SparkSession,
parentState: Option[SessionState] = None
) extends BaseSessionStateBuilder(session, parentState)
/**
* Hive-aware resource loader for adding JARs to both Spark and Hive
*/
class HiveSessionResourceLoader(sparkSession: SparkSession) extends SessionResourceLoader(sparkSession) {
/**
* Add JAR to both Spark SQL and Hive client classpaths
* @param path - Path to JAR file
*/
override def addJar(path: String): Unit
}Configuration Integration:
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.hive.HiveUtils
// Configure with Hive-specific settings
val spark = SparkSession.builder()
.appName("Configured Hive App")
.config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.0")
.config(HiveUtils.CONVERT_METASTORE_PARQUET.key, "true")
.config(HiveUtils.CONVERT_METASTORE_ORC.key, "true")
.enableHiveSupport()
.getOrCreate()
// Access session-level catalog
val catalog = spark.catalog
catalog.listDatabases().show()
catalog.listTables("default").show()Helper methods for session management and configuration.
object HiveUtils {
/**
* Configure SparkContext with Hive external catalog support
* @param sc - SparkContext to configure
* @return Configured SparkContext
*/
def withHiveExternalCatalog(sc: SparkContext): SparkContext
}Session Lifecycle Management:
import org.apache.spark.sql.SparkSession
// Create session
val spark = SparkSession.builder()
.appName("Hive Session Lifecycle")
.enableHiveSupport()
.getOrCreate()
try {
// Use session for Hive operations
spark.sql("SHOW TABLES").show()
// Create new session (shares metastore connection)
val newSession = spark.newSession()
newSession.sql("USE another_database")
} finally {
// Clean up
spark.stop()
}Common exceptions and error handling patterns:
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
try {
val spark = SparkSession.builder()
.enableHiveSupport()
.getOrCreate()
spark.sql("SELECT * FROM non_existent_table")
} catch {
case e: AnalysisException =>
println(s"Analysis error: ${e.getMessage}")
case e: NoSuchTableException =>
println(s"Table not found: ${e.getMessage}")
case e: Exception =>
println(s"Unexpected error: ${e.getMessage}")
}For migrating legacy code from HiveContext to SparkSession:
// OLD (Deprecated)
import org.apache.spark.sql.hive.HiveContext
val hiveContext = new HiveContext(sparkContext)
val df = hiveContext.sql("SELECT * FROM table")
// NEW (Recommended)
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder()
.sparkContext(sparkContext) // Reuse existing SparkContext if needed
.enableHiveSupport()
.getOrCreate()
val df = spark.sql("SELECT * FROM table")