Apache Spark Hive Thrift Server provides HiveServer2 compatibility for Spark SQL, enabling JDBC/ODBC connectivity and Hive CLI compatibility for Spark SQL queries
—
Comprehensive CLI service implementation providing HiveServer2 compatibility with Spark SQL enhancements, including authentication, session management, and operation handling.
Core CLI service that extends HiveServer2's CLIService with Spark SQL capabilities.
/**
* Core CLI service implementation with Spark SQL integration
* @param hiveServer The parent HiveServer2 instance
* @param sqlContext The Spark SQL context for query execution
*/
class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) {
/**
* Initialize the CLI service with Hive configuration
* @param hiveConf Hive configuration object containing server settings
*/
def init(hiveConf: HiveConf): Unit
/**
* Start the CLI service and all composite services
*/
def start(): Unit
/**
* Get server information for a given session
* @param sessionHandle Handle identifying the client session
* @param getInfoType Type of information to retrieve
* @return GetInfoValue containing the requested information
*/
def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue
}Usage Examples:
import org.apache.spark.sql.hive.thriftserver.SparkSQLCLIService
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hive.service.server.HiveServer2
// Create and initialize the CLI service
val hiveServer = new HiveServer2()
val cliService = new SparkSQLCLIService(hiveServer, SparkSQLEnv.sqlContext)
// Initialize with configuration
val hiveConf = new HiveConf()
cliService.init(hiveConf)
// Start the service
cliService.start()The CLI service provides enhanced information handling specific to Spark SQL.
// Information types supported by getInfo()
import org.apache.hive.service.cli.GetInfoType
// Spark SQL specific information
GetInfoType.CLI_SERVER_NAME // Returns "Spark SQL"
GetInfoType.CLI_DBMS_NAME // Returns "Spark SQL"
GetInfoType.CLI_DBMS_VER // Returns Spark version
GetInfoType.CLI_ODBC_KEYWORDS // Returns SQL keywords supported by SparkUsage Examples:
import org.apache.hive.service.cli.{GetInfoType, SessionHandle}
// Get server information
val serverName = cliService.getInfo(sessionHandle, GetInfoType.CLI_SERVER_NAME)
println(s"Server: ${serverName.getStringValue()}") // "Spark SQL"
val version = cliService.getInfo(sessionHandle, GetInfoType.CLI_DBMS_VER)
println(s"Version: ${version.getStringValue()}") // "3.5.6"
val keywords = cliService.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS)
println(s"Keywords: ${keywords.getStringValue()}") // "SELECT,FROM,WHERE,..."The CLI service integrates with multiple authentication mechanisms.
// Authentication configuration in init()
// Kerberos authentication
hive.server2.authentication = "KERBEROS"
hive.server2.authentication.kerberos.principal = "spark/_HOST@REALM"
hive.server2.authentication.kerberos.keytab = "/etc/spark/spark.keytab"
// SPNEGO for HTTP transport
hive.server2.authentication.spnego.principal = "HTTP/_HOST@REALM"
hive.server2.authentication.spnego.keytab = "/etc/spark/spnego.keytab"
// Custom authentication providers
hive.server2.authentication = "CUSTOM"
hive.server2.custom.authentication.class = "com.example.MyAuthProvider"Utility trait providing reflection-based service management for HiveServer2 compatibility.
/**
* Trait providing composite service functionality via reflection
* Used to maintain compatibility with HiveServer2 internal APIs
*/
trait ReflectedCompositeService { this: AbstractService =>
/**
* Initialize all composite services with the given configuration
* @param hiveConf Hive configuration object
*/
def initCompositeService(hiveConf: HiveConf): Unit
/**
* Start all composite services in order
*/
def startCompositeService(): Unit
}The core interface defining all CLI service operations, implemented by SparkSQLCLIService.
/**
* Core CLI service interface defining all thrift server operations
*/
public interface ICLIService {
/**
* Open a new client session
* @param username Client username
* @param password Client password
* @param configuration Session configuration parameters
* @return SessionHandle identifying the new session
*/
SessionHandle openSession(String username, String password, Map<String, String> configuration) throws HiveSQLException;
/**
* Open a session with user impersonation
* @param username Client username
* @param password Client password
* @param configuration Session configuration parameters
* @param delegationToken Delegation token for authentication
* @return SessionHandle identifying the new session
*/
SessionHandle openSessionWithImpersonation(String username, String password, Map<String, String> configuration, String delegationToken) throws HiveSQLException;
/**
* Close an existing session
* @param sessionHandle Handle identifying the session to close
*/
void closeSession(SessionHandle sessionHandle) throws HiveSQLException;
/**
* Get server or session information
* @param sessionHandle Session handle
* @param infoType Type of information to retrieve
* @return GetInfoValue containing the requested information
*/
GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType) throws HiveSQLException;
/**
* Execute a SQL statement synchronously
* @param sessionHandle Session handle
* @param statement SQL statement to execute
* @param confOverlay Configuration overrides for this statement
* @return OperationHandle for the execution operation
*/
OperationHandle executeStatement(SessionHandle sessionHandle, String statement, Map<String, String> confOverlay) throws HiveSQLException;
/**
* Execute a SQL statement asynchronously
* @param sessionHandle Session handle
* @param statement SQL statement to execute
* @param confOverlay Configuration overrides for this statement
* @return OperationHandle for the execution operation
*/
OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement, Map<String, String> confOverlay) throws HiveSQLException;
}The CLI service manages the complete lifecycle of server components.
// Service states (from Apache Hive Service interface)
enum SERVICE_STATE {
NOTINITED, // Service not initialized
INITED, // Service initialized but not started
STARTED, // Service started and running
STOPPED // Service stopped
}
// Service lifecycle methods
abstract class AbstractService {
def init(conf: Configuration): Unit
def start(): Unit
def stop(): Unit
def getServiceState(): SERVICE_STATE
}Usage Examples:
// Check service state
val state = cliService.getServiceState()
state match {
case SERVICE_STATE.STARTED => println("Service is running")
case SERVICE_STATE.STOPPED => println("Service is stopped")
case _ => println(s"Service state: $state")
}
// Proper service shutdown
try {
if (cliService.getServiceState() == SERVICE_STATE.STARTED) {
cliService.stop()
}
} catch {
case e: Exception =>
println(s"Error stopping CLI service: ${e.getMessage}")
}The CLI service provides comprehensive error handling for all operations.
/**
* Base exception for all CLI service errors
*/
class HiveSQLException extends SQLException {
public HiveSQLException(String reason, String sqlState, int vendorCode, Throwable cause)
public String getSqlState()
public int getVendorCode()
}
/**
* Service-level exceptions
*/
class ServiceException extends Exception {
public ServiceException(String message)
public ServiceException(String message, Throwable cause)
}Common Error Scenarios:
import org.apache.hive.service.cli.HiveSQLException
try {
val sessionHandle = cliService.openSession("user", "pass", Map.empty.asJava)
// Use session...
} catch {
case e: HiveSQLException =>
println(s"SQL Error: ${e.getMessage}, State: ${e.getSqlState}")
case e: ServiceException =>
println(s"Service Error: ${e.getMessage}")
}Install with Tessl CLI
npx tessl i tessl/maven-org-apache-spark--spark-hive-thriftserver-2-12