Python client for the Impala distributed query engine and HiveServer2 implementations
—
Comprehensive exception hierarchy following DB API 2.0 standards, with specialized exceptions for RPC communication, HiveServer2-specific errors, and transport issues.
Foundation exception classes providing the base for all database-related errors.
class Error(Exception):
"""Base exception class for all database errors."""
class Warning(Exception):
"""Exception for important warnings like data truncations."""Standard database exception hierarchy as defined by DB API 2.0 (PEP 249).
class InterfaceError(Error):
"""
Exception for interface-related errors.
Raised for errors related to the database interface rather than
the database itself.
"""
class DatabaseError(Error):
"""
Exception for database-related errors.
Base class for all errors related to the database operation.
"""
class InternalError(DatabaseError):
"""
Exception for internal database errors.
Raised when the database encounters an internal error.
"""
class OperationalError(DatabaseError):
"""
Exception for operational errors.
Raised for errors related to database operation and not necessarily
under user control (connection lost, memory allocation error, etc.).
"""
class ProgrammingError(DatabaseError):
"""
Exception for programming errors.
Raised for errors due to problems with the SQL statement or
parameters (table not found, syntax error, wrong number of
parameters, etc.).
"""
class IntegrityError(DatabaseError):
"""
Exception for data integrity errors.
Raised when data integrity of the database is affected
(foreign key check fails, duplicate key, etc.).
"""
class DataError(DatabaseError):
"""
Exception for data-related errors.
Raised for errors due to problems with the processed data
(division by zero, numeric value out of range, etc.).
"""
class NotSupportedError(DatabaseError):
"""
Exception for unsupported operations.
Raised when a method or database API was used which is not
supported by the database.
"""Specialized exceptions for HiveServer2 protocol and RPC communication errors.
class RPCError(Error):
"""
Exception for RPC communication errors.
Base class for errors that occur during Thrift RPC communication
with the HiveServer2 service.
"""
class HiveServer2Error(RPCError):
"""
Exception for HiveServer2-specific errors.
Raised for errors specific to HiveServer2 operations
and protocol handling.
"""
class HttpError(RPCError):
"""
Exception for HTTP transport errors.
Raised when errors occur during HTTP-based transport
communication with HiveServer2.
"""
class BeeswaxError(RPCError):
"""
Exception for Beeswax protocol errors.
Raised for errors related to the legacy Beeswax protocol
(mainly for older Hive versions).
"""
class QueryStateError(BeeswaxError):
"""
Exception for query state errors.
Raised when query execution encounters state-related issues
(query cancelled, timeout, invalid state transitions).
"""
class DisconnectedError(BeeswaxError):
"""
Exception for connection disconnection errors.
Raised when the connection to HiveServer2 is unexpectedly
lost or cannot be established.
"""from impala.dbapi import connect
from impala.error import Error, OperationalError, ProgrammingError
try:
conn = connect(host='impala-host', port=21050)
cursor = conn.cursor()
# This might raise ProgrammingError for SQL syntax issues
cursor.execute("SELECT * FROM non_existent_table")
results = cursor.fetchall()
except ProgrammingError as e:
print(f"SQL Programming Error: {e}")
except OperationalError as e:
print(f"Operational Error (connection, etc.): {e}")
except Error as e:
print(f"General Database Error: {e}")
finally:
if 'cursor' in locals():
cursor.close()
if 'conn' in locals():
conn.close()from impala.dbapi import connect
from impala.error import OperationalError, DisconnectedError, RPCError
def robust_connect(host, port, max_retries=3):
"""Connect with retry logic and proper error handling."""
for attempt in range(max_retries):
try:
conn = connect(host=host, port=port, timeout=10)
return conn
except DisconnectedError as e:
print(f"Connection attempt {attempt + 1} failed: {e}")
if attempt == max_retries - 1:
raise
except RPCError as e:
print(f"RPC Error on attempt {attempt + 1}: {e}")
if attempt == max_retries - 1:
raise
except OperationalError as e:
print(f"Operational error on attempt {attempt + 1}: {e}")
if attempt == max_retries - 1:
raise
# Usage
try:
connection = robust_connect('impala-cluster.example.com', 21050)
print("Successfully connected to Impala")
except Exception as e:
print(f"Failed to connect after retries: {e}")from impala.dbapi import connect
from impala.error import (
ProgrammingError, DataError, QueryStateError,
NotSupportedError, InternalError
)
def execute_with_error_handling(cursor, query, params=None):
"""Execute query with comprehensive error handling."""
try:
cursor.execute(query, params)
return cursor.fetchall()
except ProgrammingError as e:
# SQL syntax errors, table not found, etc.
print(f"SQL Programming Error: {e}")
print("Check your SQL syntax and table/column names")
raise
except DataError as e:
# Data type issues, value out of range, etc.
print(f"Data Error: {e}")
print("Check your data types and value ranges")
raise
except QueryStateError as e:
# Query was cancelled or timed out
print(f"Query State Error: {e}")
print("Query may have been cancelled or timed out")
raise
except NotSupportedError as e:
# Unsupported SQL features
print(f"Feature Not Supported: {e}")
print("This SQL feature is not supported by Impala/Hive")
raise
except InternalError as e:
# Internal database errors
print(f"Internal Database Error: {e}")
print("This appears to be an internal database issue")
raise
# Usage
conn = connect(host='impala-host', port=21050)
cursor = conn.cursor()
try:
results = execute_with_error_handling(
cursor,
"SELECT * FROM sales WHERE date > %(start_date)s",
{'start_date': '2023-01-01'}
)
for row in results:
print(row)
finally:
cursor.close()
conn.close()from impala.dbapi import connect
from impala.error import InterfaceError, OperationalError
def connect_with_auth_fallback(host, port, auth_configs):
"""Try multiple authentication mechanisms."""
for auth_config in auth_configs:
try:
print(f"Trying authentication: {auth_config['auth_mechanism']}")
conn = connect(host=host, port=port, **auth_config)
# Test the connection
cursor = conn.cursor()
cursor.execute("SELECT 1")
cursor.fetchone()
cursor.close()
print(f"Successfully authenticated with: {auth_config['auth_mechanism']}")
return conn
except InterfaceError as e:
print(f"Interface error with {auth_config['auth_mechanism']}: {e}")
continue
except OperationalError as e:
print(f"Auth failed with {auth_config['auth_mechanism']}: {e}")
continue
raise Exception("All authentication methods failed")
# Usage
auth_methods = [
{'auth_mechanism': 'GSSAPI'}, # Try Kerberos first
{'auth_mechanism': 'LDAP', 'user': 'username', 'password': 'password'},
{'auth_mechanism': 'NOSASL'}, # Fallback to no auth
]
try:
connection = connect_with_auth_fallback(
'impala-host', 21050, auth_methods
)
except Exception as e:
print(f"Authentication failed: {e}")from impala.dbapi import connect
from impala.error import HttpError, OperationalError
try:
# Connect using HTTP transport
conn = connect(
host='impala-gateway.example.com',
port=28000, # HTTP port
use_http_transport=True,
http_path='cliservice',
use_ssl=True
)
cursor = conn.cursor()
cursor.execute("SELECT version()")
result = cursor.fetchone()
print(f"Connected via HTTP: {result[0]}")
except HttpError as e:
print(f"HTTP Transport Error: {e}")
print("Check HTTP transport configuration and proxy settings")
except OperationalError as e:
print(f"Connection Error: {e}")
print("Check host, port, and network connectivity")
finally:
if 'cursor' in locals():
cursor.close()
if 'conn' in locals():
conn.close()Install with Tessl CLI
npx tessl i tessl/pypi-impyla