Python client for the Impala distributed query engine and HiveServer2 implementations
npx @tessl/cli install tessl/pypi-impyla@0.22.0A comprehensive Python client library for HiveServer2 implementations, specifically designed for Impala and Hive distributed query engines. Impyla provides full DB API 2.0 (PEP 249) compliance, making it similar to other database clients like sqlite or MySQL clients, while supporting advanced features like Kerberos authentication, LDAP, SSL connections, and SQLAlchemy integration.
pip install impylaStandard DB API import:
import impala.dbapi as dbapiDirect connection import:
from impala.dbapi import connectError handling:
from impala.error import Error, DatabaseError, OperationalErrorfrom impala.dbapi import connect
# Connect to Impala
conn = connect(host='your-impala-host', port=21050)
cursor = conn.cursor()
# Execute a query
cursor.execute('SELECT * FROM my_table LIMIT 10')
results = cursor.fetchall()
# Print results
for row in results:
print(row)
# Clean up
cursor.close()
conn.close()Impyla follows the DB API 2.0 specification with these key components:
The library serves as a bridge between Python applications and big data platforms, enabling seamless integration with the Python data science ecosystem through pandas DataFrame conversion capabilities.
Full DB API 2.0 compliant interface providing connection management, query execution, and result fetching. Supports all standard database operations with comprehensive parameter binding and transaction control.
def connect(host='localhost', port=21050, database=None, timeout=None,
use_ssl=False, ca_cert=None, auth_mechanism='NOSASL', user=None,
password=None, kerberos_service_name='impala', use_ldap=None,
ldap_user=None, ldap_password=None, use_kerberos=None,
protocol=None, krb_host=None, use_http_transport=False,
http_path='', auth_cookie_names=None, http_cookie_names=None,
retries=3, jwt=None, user_agent=None,
get_user_custom_headers_func=None):
"""
Get a connection to HiveServer2 (HS2).
Returns:
HiveServer2Connection: Connection object implementing DB API 2.0
"""Comprehensive exception hierarchy following DB API 2.0 standards, with specialized exceptions for RPC communication, HiveServer2-specific errors, and transport issues.
class Error(Exception):
"""Base exception class for all database errors."""
class DatabaseError(Error):
"""Exception for database-related errors."""
class OperationalError(DatabaseError):
"""Exception for operational errors (connection issues, etc.)."""
class ProgrammingError(DatabaseError):
"""Exception for programming errors (SQL syntax, etc.)."""Utility functions for data conversion and integration with the Python data science ecosystem, including pandas DataFrame conversion and database management functions.
def as_pandas(cursor, coerce_float=False):
"""
Convert cursor results to pandas DataFrame.
Parameters:
cursor: Active cursor with executed query
coerce_float (bool): Coerce numeric columns to float
Returns:
pandas.DataFrame: Results as DataFrame
"""SQLAlchemy dialect support for Impala, enabling ORM and core SQLAlchemy functionality with Impala and Hive backends.
class ImpalaDialect:
"""SQLAlchemy dialect for Impala."""
class Impala4Dialect:
"""SQLAlchemy dialect for Impala 4.x."""# Module-level constants
apilevel = '2.0'
threadsafety = 1
paramstyle = 'pyformat'
# Type objects for DB API 2.0 compliance
STRING: _DBAPITypeObject # Matches 'STRING'
BINARY: _DBAPITypeObject # Matches 'BINARY'
NUMBER: _DBAPITypeObject # Matches 'BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', 'DECIMAL'
DATETIME: _DBAPITypeObject # Matches 'TIMESTAMP'
DATE: _DBAPITypeObject # Matches 'DATE'
ROWID: _DBAPITypeObject # Empty values set
# Date/time constructors
def Date(year, month, day): ...
def Time(hour, minute, second): ...
def Timestamp(year, month, day, hour, minute, second): ...
def DateFromTicks(ticks): ...
def TimeFromTicks(ticks): ...
def TimestampFromTicks(ticks): ...
# Binary data constructor
def Binary(data): ...