CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-chdb

chDB is an in-process SQL OLAP Engine powered by ClickHouse

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

sessions.mddocs/

Stateful Sessions

Persistent database sessions that maintain state across queries, enabling DDL operations, temporary tables, views, and complex multi-query workflows. Sessions provide database-like persistence either in memory or on disk.

Capabilities

Session Management

Create and manage stateful database sessions with automatic cleanup.

class Session:
    def __init__(self, path: str = None):
        """
        Initialize a new database session.
        
        Parameters:
        - path: Optional directory path for persistent storage. If None, creates temporary directory that is auto-cleaned.
        
        Notes:
        - Default database is "_local" with Memory engine
        - Data stored in memory unless different database/engine specified
        - Temporary sessions are cleaned up automatically on deletion
        - Persistent sessions (with path) are not auto-cleaned
        """
    
    def query(self, sql: str, fmt: str = "CSV", udf_path: str = ""):
        """
        Execute SQL query within session context.
        
        Parameters:
        - sql: SQL query string to execute
        - fmt: Output format ("CSV", "JSON", "DataFrame", "Pretty", etc.)
        - udf_path: Optional path to user-defined function configurations
        
        Returns:
        Query result in specified format
        
        Raises:
        ChdbError: If query execution fails
        """
    
    def sql(self, sql: str, fmt: str = "CSV", udf_path: str = ""):
        """Alias for query() method with identical functionality."""
    
    def cleanup(self):
        """Manually clean up session resources and temporary files."""

Context Manager Support

Sessions support Python context manager protocol for automatic resource cleanup.

class Session:
    def __enter__(self):
        """Enter context manager, returns self."""
    
    def __exit__(self, exc_type, exc_value, traceback):
        """Exit context manager, calls cleanup()."""
    
    def __del__(self):
        """Destructor that calls cleanup() for temporary sessions."""

Usage Examples

Basic Session Usage

from chdb import session

# Create temporary session (auto-cleanup)
sess = session.Session()

# Execute queries with persistent state
sess.query("CREATE DATABASE test_db ENGINE = Atomic")
sess.query("CREATE TABLE test_db.users (id Int32, name String) ENGINE = Memory")
sess.query("INSERT INTO test_db.users VALUES (1, 'Alice'), (2, 'Bob')")

# Query the data
result = sess.query("SELECT * FROM test_db.users", "Pretty")
print(result)

# Session is automatically cleaned up when sess goes out of scope

Context Manager Usage

from chdb import session

# Using session as context manager for automatic cleanup
with session.Session() as sess:
    # Create database objects
    sess.query("CREATE DATABASE IF NOT EXISTS analytics ENGINE = Atomic")
    
    # Create table with specific engine
    sess.query('''
        CREATE TABLE IF NOT EXISTS analytics.sales (
            date Date,
            product_id Int32,
            quantity Int32,
            price Float64
        ) ENGINE = MergeTree()
        ORDER BY (date, product_id)
    ''')
    
    # Insert sample data
    sess.query('''
        INSERT INTO analytics.sales VALUES
        ('2024-01-01', 1, 100, 9.99),
        ('2024-01-01', 2, 50, 19.99),
        ('2024-01-02', 1, 75, 9.99),
        ('2024-01-02', 2, 80, 19.99)
    ''')
    
    # Complex analytical query
    report = sess.query('''
        SELECT 
            date,
            SUM(quantity) as total_quantity,
            SUM(quantity * price) as total_revenue,
            AVG(price) as avg_price
        FROM analytics.sales
        GROUP BY date
        ORDER BY date
    ''', "DataFrame")
    
    print(report)
# Session automatically cleaned up here

Persistent Session Storage

from chdb import session
import os

# Create session with persistent storage
db_path = "/tmp/my_persistent_db"
sess = session.Session(path=db_path)

# Create persistent data structures
sess.query("CREATE DATABASE IF NOT EXISTS warehouse ENGINE = Atomic")
sess.query('''
    CREATE TABLE IF NOT EXISTS warehouse.inventory (
        item_id Int32,
        item_name String,
        quantity Int32,
        last_updated DateTime
    ) ENGINE = MergeTree()
    ORDER BY item_id
''')

# Insert data
sess.query('''
    INSERT INTO warehouse.inventory VALUES
    (1, 'Widget A', 100, now()),
    (2, 'Widget B', 250, now()),
    (3, 'Widget C', 75, now())
''')

print("Data inserted into persistent storage")

# Close session but keep data
sess.cleanup()

# Later: Reconnect to same persistent storage
sess2 = session.Session(path=db_path)
inventory = sess2.query("SELECT * FROM warehouse.inventory", "DataFrame")
print("Retrieved from persistent storage:")
print(inventory)

# Manual cleanup when done with persistent data
sess2.cleanup()
# Note: Database files remain at db_path

Working with Views and Complex Queries

from chdb import session

with session.Session() as sess:
    # Create database and base tables
    sess.query("CREATE DATABASE reporting ENGINE = Atomic")
    
    sess.query('''
        CREATE TABLE reporting.orders (
            order_id Int32,
            customer_id Int32,
            order_date Date,
            amount Float64
        ) ENGINE = Memory
    ''')
    
    sess.query('''
        CREATE TABLE reporting.customers (
            customer_id Int32,
            customer_name String,
            region String
        ) ENGINE = Memory
    ''')
    
    # Insert sample data
    sess.query('''
        INSERT INTO reporting.orders VALUES
        (1, 101, '2024-01-15', 250.00),
        (2, 102, '2024-01-16', 125.50),
        (3, 101, '2024-01-17', 89.99),
        (4, 103, '2024-01-18', 450.00)
    ''')
    
    sess.query('''
        INSERT INTO reporting.customers VALUES
        (101, 'Acme Corp', 'North'),
        (102, 'Tech Solutions', 'South'),
        (103, 'Global Industries', 'East')
    ''')
    
    # Create view for easy reporting
    sess.query('''
        CREATE VIEW reporting.customer_summary AS
        SELECT 
            c.customer_name,
            c.region,
            COUNT(o.order_id) as order_count,
            SUM(o.amount) as total_spent,
            AVG(o.amount) as avg_order_value
        FROM reporting.customers c
        LEFT JOIN reporting.orders o ON c.customer_id = o.customer_id
        GROUP BY c.customer_id, c.customer_name, c.region
        ORDER BY total_spent DESC
    ''')
    
    # Query the view
    summary = sess.query("SELECT * FROM reporting.customer_summary", "Pretty")
    print("Customer Summary Report:")
    print(summary)
    
    # Use view in more complex queries
    top_customers = sess.query('''
        SELECT customer_name, total_spent
        FROM reporting.customer_summary
        WHERE total_spent > 200
    ''', "JSON")
    
    print("Top customers (>$200):")
    print(top_customers)

DataFrame Output with Sessions

from chdb import session
import pandas as pd

with session.Session() as sess:
    # Create and populate table
    sess.query("CREATE TABLE sales (date Date, amount Float64) ENGINE = Memory")
    sess.query('''
        INSERT INTO sales VALUES
        ('2024-01-01', 100.0),
        ('2024-01-02', 150.0),
        ('2024-01-03', 200.0)
    ''')
    
    # Get results as DataFrame
    df = sess.query("SELECT * FROM sales ORDER BY date", "DataFrame")
    
    # Work with pandas DataFrame
    df['cumulative'] = df['amount'].cumsum()
    print("Sales with cumulative total:")
    print(df)
    
    # Use DataFrame results in subsequent queries
    total = sess.query("SELECT SUM(amount) as total FROM sales", "JSON")
    print(f"Total sales: {total}")

Error Handling with Sessions

from chdb import session, ChdbError

try:
    with session.Session() as sess:
        # This will work
        sess.query("CREATE TABLE test (id Int32) ENGINE = Memory")
        
        # This might fail if table already exists without IF NOT EXISTS
        sess.query("CREATE TABLE test (id Int32) ENGINE = Memory")
        
except ChdbError as e:
    print(f"Session query failed: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-chdb

docs

dataframe.md

dbapi.md

index.md

query-functions.md

sessions.md

udf.md

utils.md

tile.json