chDB is an in-process SQL OLAP Engine powered by ClickHouse
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Persistent database sessions that maintain state across queries, enabling DDL operations, temporary tables, views, and complex multi-query workflows. Sessions provide database-like persistence either in memory or on disk.
Create and manage stateful database sessions with automatic cleanup.
class Session:
def __init__(self, path: str = None):
"""
Initialize a new database session.
Parameters:
- path: Optional directory path for persistent storage. If None, creates temporary directory that is auto-cleaned.
Notes:
- Default database is "_local" with Memory engine
- Data stored in memory unless different database/engine specified
- Temporary sessions are cleaned up automatically on deletion
- Persistent sessions (with path) are not auto-cleaned
"""
def query(self, sql: str, fmt: str = "CSV", udf_path: str = ""):
"""
Execute SQL query within session context.
Parameters:
- sql: SQL query string to execute
- fmt: Output format ("CSV", "JSON", "DataFrame", "Pretty", etc.)
- udf_path: Optional path to user-defined function configurations
Returns:
Query result in specified format
Raises:
ChdbError: If query execution fails
"""
def sql(self, sql: str, fmt: str = "CSV", udf_path: str = ""):
"""Alias for query() method with identical functionality."""
def cleanup(self):
"""Manually clean up session resources and temporary files."""Sessions support Python context manager protocol for automatic resource cleanup.
class Session:
def __enter__(self):
"""Enter context manager, returns self."""
def __exit__(self, exc_type, exc_value, traceback):
"""Exit context manager, calls cleanup()."""
def __del__(self):
"""Destructor that calls cleanup() for temporary sessions."""from chdb import session
# Create temporary session (auto-cleanup)
sess = session.Session()
# Execute queries with persistent state
sess.query("CREATE DATABASE test_db ENGINE = Atomic")
sess.query("CREATE TABLE test_db.users (id Int32, name String) ENGINE = Memory")
sess.query("INSERT INTO test_db.users VALUES (1, 'Alice'), (2, 'Bob')")
# Query the data
result = sess.query("SELECT * FROM test_db.users", "Pretty")
print(result)
# Session is automatically cleaned up when sess goes out of scopefrom chdb import session
# Using session as context manager for automatic cleanup
with session.Session() as sess:
# Create database objects
sess.query("CREATE DATABASE IF NOT EXISTS analytics ENGINE = Atomic")
# Create table with specific engine
sess.query('''
CREATE TABLE IF NOT EXISTS analytics.sales (
date Date,
product_id Int32,
quantity Int32,
price Float64
) ENGINE = MergeTree()
ORDER BY (date, product_id)
''')
# Insert sample data
sess.query('''
INSERT INTO analytics.sales VALUES
('2024-01-01', 1, 100, 9.99),
('2024-01-01', 2, 50, 19.99),
('2024-01-02', 1, 75, 9.99),
('2024-01-02', 2, 80, 19.99)
''')
# Complex analytical query
report = sess.query('''
SELECT
date,
SUM(quantity) as total_quantity,
SUM(quantity * price) as total_revenue,
AVG(price) as avg_price
FROM analytics.sales
GROUP BY date
ORDER BY date
''', "DataFrame")
print(report)
# Session automatically cleaned up herefrom chdb import session
import os
# Create session with persistent storage
db_path = "/tmp/my_persistent_db"
sess = session.Session(path=db_path)
# Create persistent data structures
sess.query("CREATE DATABASE IF NOT EXISTS warehouse ENGINE = Atomic")
sess.query('''
CREATE TABLE IF NOT EXISTS warehouse.inventory (
item_id Int32,
item_name String,
quantity Int32,
last_updated DateTime
) ENGINE = MergeTree()
ORDER BY item_id
''')
# Insert data
sess.query('''
INSERT INTO warehouse.inventory VALUES
(1, 'Widget A', 100, now()),
(2, 'Widget B', 250, now()),
(3, 'Widget C', 75, now())
''')
print("Data inserted into persistent storage")
# Close session but keep data
sess.cleanup()
# Later: Reconnect to same persistent storage
sess2 = session.Session(path=db_path)
inventory = sess2.query("SELECT * FROM warehouse.inventory", "DataFrame")
print("Retrieved from persistent storage:")
print(inventory)
# Manual cleanup when done with persistent data
sess2.cleanup()
# Note: Database files remain at db_pathfrom chdb import session
with session.Session() as sess:
# Create database and base tables
sess.query("CREATE DATABASE reporting ENGINE = Atomic")
sess.query('''
CREATE TABLE reporting.orders (
order_id Int32,
customer_id Int32,
order_date Date,
amount Float64
) ENGINE = Memory
''')
sess.query('''
CREATE TABLE reporting.customers (
customer_id Int32,
customer_name String,
region String
) ENGINE = Memory
''')
# Insert sample data
sess.query('''
INSERT INTO reporting.orders VALUES
(1, 101, '2024-01-15', 250.00),
(2, 102, '2024-01-16', 125.50),
(3, 101, '2024-01-17', 89.99),
(4, 103, '2024-01-18', 450.00)
''')
sess.query('''
INSERT INTO reporting.customers VALUES
(101, 'Acme Corp', 'North'),
(102, 'Tech Solutions', 'South'),
(103, 'Global Industries', 'East')
''')
# Create view for easy reporting
sess.query('''
CREATE VIEW reporting.customer_summary AS
SELECT
c.customer_name,
c.region,
COUNT(o.order_id) as order_count,
SUM(o.amount) as total_spent,
AVG(o.amount) as avg_order_value
FROM reporting.customers c
LEFT JOIN reporting.orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.region
ORDER BY total_spent DESC
''')
# Query the view
summary = sess.query("SELECT * FROM reporting.customer_summary", "Pretty")
print("Customer Summary Report:")
print(summary)
# Use view in more complex queries
top_customers = sess.query('''
SELECT customer_name, total_spent
FROM reporting.customer_summary
WHERE total_spent > 200
''', "JSON")
print("Top customers (>$200):")
print(top_customers)from chdb import session
import pandas as pd
with session.Session() as sess:
# Create and populate table
sess.query("CREATE TABLE sales (date Date, amount Float64) ENGINE = Memory")
sess.query('''
INSERT INTO sales VALUES
('2024-01-01', 100.0),
('2024-01-02', 150.0),
('2024-01-03', 200.0)
''')
# Get results as DataFrame
df = sess.query("SELECT * FROM sales ORDER BY date", "DataFrame")
# Work with pandas DataFrame
df['cumulative'] = df['amount'].cumsum()
print("Sales with cumulative total:")
print(df)
# Use DataFrame results in subsequent queries
total = sess.query("SELECT SUM(amount) as total FROM sales", "JSON")
print(f"Total sales: {total}")from chdb import session, ChdbError
try:
with session.Session() as sess:
# This will work
sess.query("CREATE TABLE test (id Int32) ENGINE = Memory")
# This might fail if table already exists without IF NOT EXISTS
sess.query("CREATE TABLE test (id Int32) ENGINE = Memory")
except ChdbError as e:
print(f"Session query failed: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-chdb