ClickHouse Database Core Driver for Python, Pandas, and Superset
npx @tessl/cli install tessl/pypi-clickhouse-connect@0.8.0A high-performance Python database driver for connecting ClickHouse databases to Python applications, with specialized support for data science workflows through Pandas DataFrames, NumPy arrays, and PyArrow tables. Features comprehensive integration with Apache Superset for business intelligence and includes a minimal SQLAlchemy implementation for compatibility with SQL-based tools and frameworks.
pip install clickhouse-connectpip install clickhouse-connect[sqlalchemy] for SQLAlchemy supportpip install clickhouse-connect[numpy] for NumPy supportpip install clickhouse-connect[pandas] for Pandas supportpip install clickhouse-connect[arrow] for PyArrow supportpip install clickhouse-connect[orjson] for faster JSON processingimport clickhouse_connectStandard client creation:
from clickhouse_connect import create_client, create_async_clientLegacy aliases (deprecated):
from clickhouse_connect import get_client, get_async_clientimport clickhouse_connect
# Create a client
client = clickhouse_connect.create_client(
host='localhost',
port=8123,
username='default',
password='',
database='default'
)
# Execute a simple query
result = client.query('SELECT version()')
print(result.result_set[0][0])
# Query with parameters
result = client.query(
'SELECT * FROM system.tables WHERE database = {db:String}',
parameters={'db': 'system'}
)
# Iterate through results
for row in result.result_set:
print(row)
# Insert data
data = [
['John', 25, 'Engineer'],
['Jane', 30, 'Manager'],
['Bob', 35, 'Developer']
]
client.insert(
'users',
data,
column_names=['name', 'age', 'position']
)
# Work with pandas DataFrames (requires pandas)
import pandas as pd
df = pd.DataFrame({
'name': ['Alice', 'Bob'],
'age': [25, 30],
'city': ['NYC', 'LA']
})
# Insert DataFrame
client.insert_df('users_df', df)
# Query to DataFrame
df_result = client.query_df('SELECT * FROM users_df')
print(df_result)
# Close the connection
client.close()ClickHouse Connect provides multiple API layers for different use cases:
The driver utilizes ClickHouse's HTTP interface for maximum compatibility and offers both synchronous and asynchronous operation modes, built-in support for various data compression formats (zstandard, lz4), and optional Cython-compiled extensions for enhanced performance.
Factory functions for creating synchronous and asynchronous client instances with comprehensive connection configuration options including authentication, security, compression, and connection pooling.
def create_client(
host: str | None = None,
username: str | None = None,
password: str = '',
access_token: str | None = None,
database: str = '__default__',
interface: str | None = None,
port: int = 0,
secure: bool | str = False,
dsn: str | None = None,
settings: dict[str, Any] | None = None,
generic_args: dict[str, Any] | None = None,
compress: bool | str = False,
query_limit: int = 0,
connect_timeout: int = 10,
send_receive_timeout: int = 300,
client_name: str | None = None,
verify: bool | str = True,
ca_cert: str | None = None,
client_cert: str | None = None,
client_cert_key: str | None = None,
session_id: str | None = None,
pool_mgr: Any | None = None,
http_proxy: str | None = None,
https_proxy: str | None = None,
server_host_name: str | None = None,
apply_server_timezone: str | bool | None = None,
show_clickhouse_errors: bool | None = None,
autogenerate_session_id: bool | None = None,
**kwargs
) -> Client: ...
def create_async_client(
host: str | None = None,
username: str | None = None,
password: str = '',
access_token: str | None = None,
database: str = '__default__',
interface: str | None = None,
port: int = 0,
secure: bool | str = False,
dsn: str | None = None,
settings: dict[str, Any] | None = None,
generic_args: dict[str, Any] | None = None,
executor_threads: int | None = None,
compress: bool | str = False,
query_limit: int = 0,
connect_timeout: int = 10,
send_receive_timeout: int = 300,
client_name: str | None = None,
verify: bool | str = True,
ca_cert: str | None = None,
client_cert: str | None = None,
client_cert_key: str | None = None,
session_id: str | None = None,
pool_mgr: Any | None = None,
http_proxy: str | None = None,
https_proxy: str | None = None,
server_host_name: str | None = None,
apply_server_timezone: str | bool | None = None,
show_clickhouse_errors: bool | None = None,
autogenerate_session_id: bool | None = None,
**kwargs
) -> AsyncClient: ...
# Legacy aliases (deprecated but still available)
get_client = create_client
get_async_client = create_async_clientSeamless integration with scientific Python ecosystem including NumPy arrays, Pandas DataFrames, and PyArrow tables for high-performance data processing and analysis workflows.
def query_df(self, query: str, **kwargs) -> pd.DataFrame: ...
def query_np(self, query: str, **kwargs) -> np.ndarray: ...
def query_arrow(self, query: str, **kwargs) -> pa.Table: ...
def insert_df(self, table: str, df: pd.DataFrame, **kwargs): ...
def insert_arrow(self, table: str, arrow_table: pa.Table, **kwargs): ...Standard Python Database API 2.0 implementation providing Connection and Cursor objects for compatibility with existing database tools and frameworks.
def connect(
host: str | None = None,
database: str | None = None,
username: str | None = '',
password: str | None = '',
port: int | None = None,
**kwargs
) -> Connection: ...
class Connection:
def cursor(self) -> Cursor: ...
def close(self): ...
def commit(self): ...
def rollback(self): ...
class Cursor:
def execute(self, operation: str, parameters=None): ...
def fetchone(self) -> Sequence | None: ...
def fetchmany(self, size: int = None) -> Sequence[Sequence]: ...
def fetchall(self) -> Sequence[Sequence]: ...Complete SQLAlchemy dialect implementation enabling ORM capabilities, query building, and integration with SQL-based tools and frameworks.
class ClickHouseDialect:
name = 'clickhousedb'
# SQLAlchemy dialect implementationComprehensive exception hierarchy providing detailed error information and compatibility with DB-API 2.0 exception model for robust error handling.
class ClickHouseError(Exception): ...
class DatabaseError(Error): ...
class OperationalError(DatabaseError): ...
class ProgrammingError(DatabaseError): ...
class StreamClosedError(ClickHouseError): ...Development and testing utilities including data generation tools, external data support, and configuration management for enhanced developer experience.
def random_col_data(ch_type: str, size: int, nullable: bool = False) -> list: ...
def version() -> str: ...
def get_setting(name: str) -> Any: ...
def set_setting(name: str, value: Any) -> None: ...
class ExternalData: ...
class TableContext: ...Core data structures returned by query and insert operations, providing structured access to results, metadata, and execution summaries.
class QueryResult:
result_set: List[List[Any]]
column_names: Tuple[str, ...]
column_types: Tuple[ClickHouseType, ...]
summary: Dict[str, Any]
def named_results(self) -> List[Dict[str, Any]]: ...
@property
def first_row(self) -> List[Any] | None: ...
@property
def first_item(self) -> Any | None: ...
class QuerySummary:
query_id: str
summary: Dict[str, Any]
def as_query_result(self) -> QueryResult: ...
class StreamContext:
def column_block_stream(self) -> Iterator[List[List[Any]]]: ...
def row_block_stream(self) -> Iterator[List[List[Any]]]: ...
def rows_stream(self) -> Iterator[List[Any]]: ...
def np_stream(self) -> Iterator[np.ndarray]: ...
def df_stream(self) -> Iterator[pd.DataFrame]: ...# Basic connection
client = create_client(host='clickhouse.example.com')
# With authentication
client = create_client(
host='clickhouse.example.com',
username='myuser',
password='mypassword',
database='analytics'
)
# Secure connection
client = create_client(
host='clickhouse.example.com',
secure=True, # Use HTTPS
verify=True, # Verify SSL certificate
ca_cert='/path/to/ca.pem'
)
# With compression
client = create_client(
host='clickhouse.example.com',
compress='lz4' # or 'zstd', 'gzip', 'brotli'
)# Set global settings
client = create_client(
host='clickhouse.example.com',
settings={'max_threads': 4, 'max_memory_usage': '4G'}
)
# Runtime settings
client.set_client_setting('max_block_size', 65536)
setting_value = client.get_client_setting('max_block_size')# Auto-generate session IDs
client = create_client(
host='clickhouse.example.com',
autogenerate_session_id=True
)
# Manual session ID
client = create_client(
host='clickhouse.example.com',
session_id='my-session-123'
)