Spark Project Hive Thrift Server - A Thrift server implementation that provides JDBC/ODBC access to Spark SQL
npx @tessl/cli install tessl/maven-org-apache-spark--spark-hive-thriftserver_2-11@2.4.0Apache Spark Hive Thrift Server provides JDBC/ODBC access to Spark SQL through the HiveServer2 protocol, enabling remote clients to execute SQL queries against Spark clusters using standard database connectivity tools and BI applications.
import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
import org.apache.spark.sql.SQLContextimport org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
// Create Spark SQL context
val conf = new SparkConf().setAppName("ThriftServer")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
// Start the thrift server
HiveThriftServer2.startWithContext(sqlContext)# Start Thrift Server
$SPARK_HOME/sbin/start-thriftserver.sh --master spark://master:7077
# Start CLI
$SPARK_HOME/bin/spark-sqlThe Spark Hive Thrift Server consists of several key components:
Main entry points for starting and managing the Thrift Server with lifecycle control and configuration.
object HiveThriftServer2 {
def startWithContext(sqlContext: SQLContext): Unit
def main(args: Array[String]): Unit
var uiTab: Option[ThriftServerTab]
var listener: HiveThriftServer2Listener
}Command-line interface for interactive SQL execution with Spark SQL integration.
object SparkSQLCLIDriver {
def main(args: Array[String]): Unit
def installSignalHandler(): Unit
}Session lifecycle management with SQL context handling and client connection management.
class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager {
def openSession(protocol: TProtocolVersion, username: String, passwd: String,
ipAddress: String, sessionConf: java.util.Map[String, String],
withImpersonation: Boolean, delegationToken: String): SessionHandle
def closeSession(sessionHandle: SessionHandle): Unit
}SQL query execution operations with result handling and asynchronous processing support.
class SparkSQLOperationManager extends OperationManager {
val sessionToActivePool: ConcurrentHashMap[SessionHandle, String]
val sessionToContexts: ConcurrentHashMap[SessionHandle, SQLContext]
def newExecuteStatementOperation(parentSession: HiveSession, statement: String,
confOverlay: JMap[String, String], async: Boolean): ExecuteStatementOperation
}Web-based monitoring interface for active sessions, query execution, and server performance metrics.
class ThriftServerTab(sparkContext: SparkContext) extends SparkUITab {
val name: String = "JDBC/ODBC Server"
def detach(): Unit
}Spark SQL environment initialization and cleanup with configuration management.
object SparkSQLEnv {
var sqlContext: SQLContext
var sparkContext: SparkContext
def init(): Unit
def stop(): Unit
}// Session information tracking
class SessionInfo(sessionId: String, startTimestamp: Long, ip: String, userName: String) {
var finishTimestamp: Long
var totalExecution: Int
def totalTime: Long
}
// Query execution tracking
class ExecutionInfo(statement: String, sessionId: String, startTimestamp: Long, userName: String) {
var finishTimestamp: Long
var executePlan: String
var detail: String
var state: ExecutionState.Value
val jobId: ArrayBuffer[String]
var groupId: String
def totalTime: Long
}
// Execution states
object ExecutionState extends Enumeration {
val STARTED, COMPILED, FAILED, FINISHED = Value
type ExecutionState = Value
}
// Server listener for events
class HiveThriftServer2Listener(server: HiveServer2, conf: SQLConf) extends SparkListener {
def getOnlineSessionNum: Int
def getTotalRunning: Int
def getSessionList: Seq[SessionInfo]
def getSession(sessionId: String): Option[SessionInfo]
def getExecutionList: Seq[ExecutionInfo]
}// From Hive Service API
import org.apache.hive.service.cli.SessionHandle
import org.apache.hive.service.cli.OperationHandle
import org.apache.hive.service.cli.thrift.TProtocolVersion
import org.apache.hive.service.server.HiveServer2
import org.apache.hadoop.hive.conf.HiveConfhive.server2.transport.mode: "binary" or "http"hive.server2.thrift.port: Server port (default: 10000)hive.server2.thrift.bind.host: Bind addressspark.sql.hive.thriftServer.singleSession: Share single sessionspark.sql.thriftServer.incrementalCollect: Incremental result collection