or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

configuration.mddata-type-conversion.mdfile-formats.mdindex.mdmetastore-operations.mdsession-management.mdudf-integration.md
tile.json

session-management.mddocs/

Session Management

Core functionality for creating and managing Spark sessions with Hive integration, providing both modern SparkSession-based approach and legacy HiveContext support.

Capabilities

SparkSession with Hive Support (Recommended)

Modern approach for enabling Hive integration using SparkSession builder pattern.

/**
 * Enable Hive support for SparkSession, providing access to Hive metastore,
 * HiveQL query execution, and Hive UDF/UDAF/UDTF functions
 */
def enableHiveSupport(): SparkSession.Builder

Usage Examples:

import org.apache.spark.sql.SparkSession

// Basic Hive-enabled session
val spark = SparkSession.builder()
  .appName("Hive Integration App")
  .enableHiveSupport()
  .getOrCreate()

// With additional configuration
val spark = SparkSession.builder()
  .appName("Advanced Hive App")
  .config("spark.sql.warehouse.dir", "/user/hive/warehouse")
  .config("spark.sql.hive.metastore.version", "2.3.0")
  .enableHiveSupport()
  .getOrCreate()

// Execute HiveQL
spark.sql("SHOW DATABASES").show()
spark.sql("USE my_database")
val result = spark.sql("SELECT * FROM my_table LIMIT 10")

HiveContext (Legacy - Fully Deprecated)

⚠️ DEPRECATED: HiveContext is fully deprecated since Spark 2.0.0 and should not be used in new applications. All functionality has been replaced by SparkSession.builder().enableHiveSupport().

/**
 * Legacy Hive integration context - FULLY DEPRECATED since 2.0.0
 * This class is a thin wrapper around SparkSession and will be removed in future versions
 * Use SparkSession.builder.enableHiveSupport instead
 */
@deprecated("Use SparkSession.builder.enableHiveSupport instead", "2.0.0")
class HiveContext private[hive](_sparkSession: SparkSession) extends SQLContext(_sparkSession) {
  
  /**
   * Create HiveContext from SparkContext
   */
  def this(sc: SparkContext)
  
  /**
   * Create HiveContext from JavaSparkContext  
   */
  def this(sc: JavaSparkContext)
  
  /**
   * Create new HiveContext session with separated SQLConf, UDF/UDAF,
   * temporary tables and SessionState, but sharing CacheManager,
   * IsolatedClientLoader and Hive client
   */
  override def newSession(): HiveContext
  
  /**
   * Invalidate and refresh cached metadata for the given table
   * @param tableName - Name of table to refresh
   */
  def refreshTable(tableName: String): Unit
}

Usage Examples (DO NOT USE - Deprecated):

// ❌ DEPRECATED - DO NOT USE IN NEW CODE
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext

// Create HiveContext (deprecated approach)
val conf = new SparkConf().setAppName("Hive Legacy App")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)

// Execute queries
val result = hiveContext.sql("SELECT * FROM my_table")
result.show()

// Refresh table metadata
hiveContext.refreshTable("my_table")

// Create new session
val newSession = hiveContext.newSession()

✅ Use This Instead:

import org.apache.spark.sql.SparkSession

// Modern approach (recommended)
val spark = SparkSession.builder()
  .appName("Modern Hive App")
  .enableHiveSupport()
  .getOrCreate()

// Execute queries (same API)
val result = spark.sql("SELECT * FROM my_table")
result.show()

// Refresh table metadata
spark.catalog.refreshTable("my_table")

// Create new session
val newSession = spark.newSession()

Session State and Resource Management

Components for managing Hive-aware session state and resources.

/**
 * Builder for Hive-aware SessionState
 */
class HiveSessionStateBuilder(
  session: SparkSession,
  parentState: Option[SessionState] = None
) extends BaseSessionStateBuilder(session, parentState)

/**
 * Hive-aware resource loader for adding JARs to both Spark and Hive
 */
class HiveSessionResourceLoader(sparkSession: SparkSession) extends SessionResourceLoader(sparkSession) {
  /**
   * Add JAR to both Spark SQL and Hive client classpaths
   * @param path - Path to JAR file
   */
  override def addJar(path: String): Unit
}

Configuration Integration:

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.hive.HiveUtils

// Configure with Hive-specific settings
val spark = SparkSession.builder()
  .appName("Configured Hive App")
  .config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.0")
  .config(HiveUtils.CONVERT_METASTORE_PARQUET.key, "true")
  .config(HiveUtils.CONVERT_METASTORE_ORC.key, "true")
  .enableHiveSupport()
  .getOrCreate()

// Access session-level catalog
val catalog = spark.catalog
catalog.listDatabases().show()
catalog.listTables("default").show()

Session Utility Methods

Helper methods for session management and configuration.

object HiveUtils {
  /**
   * Configure SparkContext with Hive external catalog support
   * @param sc - SparkContext to configure
   * @return Configured SparkContext
   */
  def withHiveExternalCatalog(sc: SparkContext): SparkContext
}

Session Lifecycle Management:

import org.apache.spark.sql.SparkSession

// Create session
val spark = SparkSession.builder()
  .appName("Hive Session Lifecycle")
  .enableHiveSupport()
  .getOrCreate()

try {
  // Use session for Hive operations
  spark.sql("SHOW TABLES").show()
  
  // Create new session (shares metastore connection)
  val newSession = spark.newSession()
  newSession.sql("USE another_database")
  
} finally {
  // Clean up
  spark.stop()
}

Error Handling

Common exceptions and error handling patterns:

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException

try {
  val spark = SparkSession.builder()
    .enableHiveSupport()
    .getOrCreate()
    
  spark.sql("SELECT * FROM non_existent_table")
} catch {
  case e: AnalysisException => 
    println(s"Analysis error: ${e.getMessage}")
  case e: NoSuchTableException =>
    println(s"Table not found: ${e.getMessage}")
  case e: Exception =>
    println(s"Unexpected error: ${e.getMessage}")
}

Migration from HiveContext to SparkSession

For migrating legacy code from HiveContext to SparkSession:

// OLD (Deprecated)
import org.apache.spark.sql.hive.HiveContext
val hiveContext = new HiveContext(sparkContext)
val df = hiveContext.sql("SELECT * FROM table")

// NEW (Recommended)  
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder()
  .sparkContext(sparkContext)  // Reuse existing SparkContext if needed
  .enableHiveSupport()
  .getOrCreate()
val df = spark.sql("SELECT * FROM table")