The portable Python dataframe library that provides a unified API for data analysis across 20+ different backends
—
Bidirectional SQL integration allowing parsing SQL into expressions and compiling expressions to SQL with backend-specific optimizations.
Parse SQL queries into Ibis expressions.
def parse_sql(sql, dialect=None):
"""
Parse SQL string into Ibis expression.
Parameters:
- sql: str, SQL query string
- dialect: str, optional SQL dialect ('postgres', 'mysql', 'bigquery', etc.)
Returns:
Table expression representing the SQL query
"""Usage Examples:
import ibis
# Parse simple SQL
sql = "SELECT name, age FROM employees WHERE age > 25"
expr = ibis.parse_sql(sql)
# Parse with dialect
bigquery_sql = """
SELECT
customer_id,
EXTRACT(YEAR FROM order_date) as year,
SUM(amount) as total_amount
FROM `project.dataset.orders`
WHERE order_date >= '2023-01-01'
GROUP BY customer_id, year
ORDER BY total_amount DESC
"""
expr = ibis.parse_sql(bigquery_sql, dialect='bigquery')
# Complex SQL with window functions
window_sql = """
SELECT
employee_id,
salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary
FROM employees
"""
expr = ibis.parse_sql(window_sql)Compile Ibis expressions to SQL for specific backends.
def to_sql(expr, dialect=None):
"""
Compile expression to SQL string.
Parameters:
- expr: Ibis expression to compile
- dialect: str, target SQL dialect
Returns:
SQL query string
"""Usage Examples:
# Create expression
table = ibis.table({'name': 'string', 'age': 'int64', 'salary': 'float64'})
expr = (
table
.filter(table.age > 25)
.group_by('name')
.aggregate(avg_salary=table.salary.mean())
.order_by('avg_salary')
)
# Compile to different dialects
postgres_sql = ibis.to_sql(expr, dialect='postgres')
bigquery_sql = ibis.to_sql(expr, dialect='bigquery')
mysql_sql = ibis.to_sql(expr, dialect='mysql')
print(postgres_sql)
# SELECT "name", AVG("salary") AS "avg_salary"
# FROM "table"
# WHERE "age" > 25
# GROUP BY "name"
# ORDER BY "avg_salary"Use backend connections to compile expressions to their native SQL dialect.
backend.compile(expr):
"""
Compile expression to backend-specific SQL.
Parameters:
- expr: Ibis expression
Returns:
SQL string optimized for the backend
"""Usage Examples:
# Backend-specific compilation
duckdb_con = ibis.duckdb.connect()
pg_con = ibis.postgres.connect(...)
# Same expression, different SQL output
expr = table.filter(table.value > 100).select('*')
duckdb_sql = duckdb_con.compile(expr)
postgres_sql = pg_con.compile(expr)
# DuckDB might use different syntax or optimizations than PostgreSQL
print("DuckDB:", duckdb_sql)
print("PostgreSQL:", postgres_sql)Execute raw SQL directly on backend connections.
backend.raw_sql(query):
"""
Execute raw SQL query.
Parameters:
- query: str, SQL query to execute
Returns:
Query results (backend-specific format)
"""Usage Examples:
# Execute raw SQL
con = ibis.postgres.connect(...)
# Simple query
result = con.raw_sql("SELECT version()")
# Complex administrative query
result = con.raw_sql("""
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
""")Convert expressions back to Python code.
def decompile(expr):
"""
Decompile expression to Python code string.
Parameters:
- expr: Ibis expression to decompile
Returns:
Python code string that recreates the expression
"""Usage Examples:
# Create complex expression
table = ibis.table({'x': 'int64', 'y': 'string', 'z': 'float64'})
expr = (
table
.filter(table.x > 10)
.mutate(
x_squared=table.x ** 2,
y_upper=table.y.upper()
)
.group_by('y_upper')
.aggregate(
count=table.count(),
avg_x=table.x.mean(),
sum_z=table.z.sum()
)
)
# Decompile to Python code
python_code = ibis.decompile(expr)
print(python_code)
# Output: Python code that recreates the expressionUse Ibis expressions within SQL templates.
Usage Examples:
# SQL with Ibis expression components
base_filter = table.date_col >= '2023-01-01'
aggregation = table.sales.sum()
# Combine in SQL template
sql_template = f"""
WITH filtered_data AS (
{ibis.to_sql(table.filter(base_filter))}
),
aggregated AS (
SELECT
category,
{ibis.to_sql(aggregation)} as total_sales
FROM filtered_data
GROUP BY category
)
SELECT * FROM aggregated
ORDER BY total_sales DESC
"""
result = con.raw_sql(sql_template)Translate SQL between different backend dialects.
Usage Examples:
# Parse SQL from one dialect
mysql_sql = """
SELECT
DATE_FORMAT(created_at, '%Y-%m') as month,
COUNT(*) as orders
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY month
ORDER BY month
"""
# Parse and translate
expr = ibis.parse_sql(mysql_sql, dialect='mysql')
# Compile for different backends
postgres_sql = ibis.to_sql(expr, dialect='postgres')
bigquery_sql = ibis.to_sql(expr, dialect='bigquery')
print("PostgreSQL:", postgres_sql)
print("BigQuery:", bigquery_sql)Use backend-specific SQL functions in expressions.
Usage Examples:
# Backend-specific functions
pg_con = ibis.postgres.connect(...)
# PostgreSQL-specific functions via raw SQL
pg_expr = pg_con.sql("""
SELECT generate_series(1, 10) as numbers
""")
# Combine with Ibis operations
result = (
pg_expr
.mutate(
squared=pg_expr.numbers ** 2,
is_even=(pg_expr.numbers % 2) == 0
)
.filter(pg_expr.numbers > 5)
)
# Compile back to SQL
final_sql = pg_con.compile(result)Add backend-specific optimization hints.
Usage Examples:
# Expression with optimization context
expr = (
table
.filter(table.date_col >= '2023-01-01')
.group_by('category')
.aggregate(total=table.amount.sum())
)
# Backend-specific optimizations
bigquery_con = ibis.bigquery.connect(...)
# BigQuery-specific SQL with hints
optimized_sql = bigquery_con.compile(expr)
# Backend may add optimizations like partitioning hintsTools for understanding generated SQL.
Usage Examples:
# Debug expression compilation
expr = table.join(other_table, table.id == other_table.ref_id)
# Get SQL with formatting
sql = ibis.to_sql(expr, dialect='postgres')
print("Generated SQL:")
print(sql)
# Analyze query plan (backend-specific)
con = ibis.postgres.connect(...)
plan = con.raw_sql(f"EXPLAIN ANALYZE {sql}")
print("Query Plan:")
print(plan)
# Get expression tree
print("Expression Structure:")
print(repr(expr))Handle SQL dialect differences transparently.
Usage Examples:
# Write dialect-agnostic expressions
expr = (
table
.mutate(
# String concatenation (varies by backend)
full_name=table.first_name + ' ' + table.last_name,
# Date extraction (varies by backend)
year=table.date_col.year(),
# Conditional logic (standardized)
status=ibis.case()
.when(table.age >= 18, 'adult')
.else_('minor')
)
)
# Automatically generates correct SQL for each backend
postgres_sql = ibis.to_sql(expr, dialect='postgres')
mysql_sql = ibis.to_sql(expr, dialect='mysql')
sqlite_sql = ibis.to_sql(expr, dialect='sqlite')
# Each uses backend-appropriate syntax for string concat, date functions, etc.Install with Tessl CLI
npx tessl i tessl/pypi-ibis-framework