Python driver with native interface for ClickHouse database providing high-performance connectivity and comprehensive data type support.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
The Client class provides direct access to ClickHouse's native protocol features with full control over query execution, data processing, and connection management. This interface offers the most comprehensive feature set including streaming results, progress tracking, external tables, and advanced query settings.
Create a Client instance with connection parameters and optional client settings for customized behavior.
class Client:
def __init__(self, host='localhost', port=9000, database='', user='default',
password='', client_name='python-driver', connect_timeout=10,
send_receive_timeout=300, sync_request_timeout=5,
compress_block_size=1048576, compression=False, secure=False,
verify=True, ssl_version=None, ca_certs=None, ciphers=None,
keyfile=None, certfile=None, server_hostname=None,
alt_hosts=None, settings_is_important=False, tcp_keepalive=False,
client_revision=None, settings=None, **kwargs):
"""
Initialize ClickHouse client.
Parameters:
- host: ClickHouse server hostname
- port: ClickHouse server port (9000 for native, 9440 for secure)
- database: Default database name
- user: Username for authentication
- password: Password for authentication
- client_name: Client identifier sent to server
- connect_timeout: Connection timeout in seconds
- send_receive_timeout: Socket send/receive timeout
- sync_request_timeout: Synchronous request timeout
- compress_block_size: Compression block size in bytes
- compression: Enable compression (requires algorithm extras)
- secure: Enable SSL/TLS connection
- verify: Verify SSL certificates
- ssl_version: SSL protocol version (None for default)
- ca_certs: Path to CA certificates file
- ciphers: SSL cipher suite specification
- keyfile: Path to client private key file for SSL authentication
- certfile: Path to client certificate file for SSL authentication
- server_hostname: Expected server hostname for SSL verification
- alt_hosts: Alternative hosts for failover (comma-separated)
- settings_is_important: Whether settings are important for queries
- tcp_keepalive: Enable TCP keepalive for connections
- client_revision: ClickHouse client revision number
- settings: Default query settings dictionary
"""Execute queries with support for parameters, column type information, and various result formats.
def execute(self, query, params=None, with_column_types=False,
external_tables=None, query_id=None, settings=None,
types_check=False, columnar=False):
"""
Execute query and return results.
Parameters:
- query: SQL query string
- params: Query parameters dictionary for %(name)s substitution
- with_column_types: Return column names and types with results
- external_tables: List of external table definitions
- query_id: Unique query identifier for tracking
- settings: Query-specific ClickHouse settings
- types_check: Enable strict type checking for parameters
- columnar: Return results in columnar format
Returns:
- List of result rows (tuples), or
- Tuple of (column_info, rows) if with_column_types=True
"""Execute queries with streaming results for memory-efficient processing of large datasets.
def execute_iter(self, query, params=None, with_column_types=False,
external_tables=None, query_id=None, settings=None,
types_check=False):
"""
Execute query and return streaming iterator.
Parameters: Same as execute()
Returns:
- IterQueryResult: Iterator yielding result blocks
Yields:
- Blocks of rows for memory-efficient processing
"""Execute queries with progress information for long-running operations monitoring.
def execute_with_progress(self, query, params=None, with_column_types=False,
external_tables=None, query_id=None, settings=None,
types_check=False):
"""
Execute query with progress tracking.
Parameters: Same as execute()
Returns:
- ProgressQueryResult: Generator yielding (progress, data) tuples
Yields:
- Tuple of (progress_info, result_block) during execution
"""Query and insert pandas DataFrames for data science workloads (requires pandas installation).
def query_dataframe(self, query, params=None, external_tables=None,
query_id=None, settings=None, replace_nonwords=True):
"""
Execute query and return pandas DataFrame.
New in version 0.2.0.
Parameters:
- query: SQL query string
- params: Query parameters dictionary for %(name)s substitution
- external_tables: List of external table definitions
- query_id: Unique query identifier for tracking
- settings: Query-specific ClickHouse settings
- replace_nonwords: Replace non-word characters in column names with underscores
Returns:
- pandas.DataFrame: Query results as DataFrame with typed columns
New in version 0.2.0.
Requires:
- pandas package installation (pip install pandas)
Raises:
- RuntimeError: If pandas is not installed
"""
def insert_dataframe(self, query, dataframe, external_tables=None,
query_id=None, settings=None):
"""
Insert pandas DataFrame data into ClickHouse table.
New in version 0.2.0.
Parameters:
- query: INSERT query string
- dataframe: pandas DataFrame with data to insert
- external_tables: List of external table definitions
- query_id: Unique query identifier for tracking
- settings: Query-specific ClickHouse settings
Returns:
- int: Number of inserted rows
Requires:
- pandas package installation (pip install pandas)
Raises:
- RuntimeError: If pandas is not installed
- ValueError: If DataFrame is missing required columns
"""Manage client connection lifecycle and query cancellation.
def disconnect(self):
"""Disconnect from ClickHouse server."""
def cancel(self, with_column_types=False):
"""
Cancel currently executing query.
Parameters:
- with_column_types: Include column information in response
Returns:
- Cancel operation result
"""Create clients from connection URLs for convenient configuration.
@classmethod
def from_url(cls, url):
"""
Create client from connection URL.
Parameters:
- url: Connection URL (clickhouse://user:password@host:port/database?param=value)
Returns:
- Client: Configured client instance
URL Format:
- clickhouse://[user[:password]@]host[:port][/database][?param1=value1¶m2=value2]
- clickhouses:// for SSL connections
"""Advanced parameter handling for complex query construction.
def substitute_params(self, query, params, context):
"""
Substitute parameters in query string.
Parameters:
- query: SQL query with %(name)s placeholders
- params: Parameters dictionary
- context: Execution context information
Returns:
- str: Query with substituted parameters
"""Use client as context manager for automatic connection cleanup.
def __enter__(self):
"""Enter context manager."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Exit context manager and disconnect."""
self.disconnect()Configure client behavior through settings dictionary passed to constructor or individual queries:
# Client-specific settings (passed in settings parameter)
settings = {
'insert_block_size': 1048576, # Chunk size for INSERT operations
'strings_as_bytes': False, # Disable string encoding/decoding
'strings_encoding': 'utf-8', # String encoding (default: UTF-8)
'use_numpy': False, # Enable NumPy for column processing
'input_format_null_as_default': False, # Initialize null fields with defaults
'namedtuple_as_json': False, # Named tuple JSON deserialization
'server_side_params': False, # Server-side parameter rendering
}# OpenTelemetry integration
settings = {
'opentelemetry_traceparent': 'trace_header_value', # W3C trace context parent
'opentelemetry_tracestate': 'trace_state_value', # W3C trace context state
'quota_key': 'user_quota_key', # Quota differentiation
}# High availability settings
settings = {
'round_robin': True, # Round-robin host selection for alt_hosts
}from clickhouse_driver import Client
client = Client('localhost')
# Simple query
result = client.execute('SELECT 1')
print(result) # [(1,)]
# Query with parameters
result = client.execute(
'SELECT * FROM users WHERE age > %(min_age)s',
{'min_age': 18}
)
# Query with column information
columns, rows = client.execute(
'SELECT name, age FROM users LIMIT 5',
with_column_types=True
)
print(columns) # [('name', 'String'), ('age', 'UInt8')]# Process large result sets efficiently
for block in client.execute_iter('SELECT * FROM large_table'):
for row in block:
process_row(row)# Monitor long-running queries
query = 'SELECT count() FROM huge_table GROUP BY category'
for progress, block in client.execute_with_progress(query):
if progress:
print(f"Processed: {progress.rows} rows")
if block:
process_results(block)# Use external data in queries
external_data = [
('id', [1, 2, 3]),
('name', ['Alice', 'Bob', 'Charlie'])
]
external_table = {
'name': 'temp_users',
'structure': [('id', 'UInt32'), ('name', 'String')],
'data': external_data
}
result = client.execute(
'SELECT * FROM temp_users WHERE id IN (SELECT id FROM main_users)',
external_tables=[external_table]
)# Connect using URL
client = Client.from_url('clickhouse://user:pass@server:9000/mydb?secure=1')
# SSL connection
client = Client.from_url('clickhouses://user:pass@server:9440/mydb')# Automatic connection cleanup
with Client('localhost') as client:
result = client.execute('SELECT version()')
print(result)
# Connection automatically closedInstall with Tessl CLI
npx tessl i tessl/pypi-clickhouse-driver