Blazingly fast DataFrame library for Python with lazy and eager evaluation modes
—
SQL query execution capabilities with SQLContext for managing multiple DataFrames and native SQL expression support within DataFrame operations for familiar SQL-based data manipulation.
Context manager for executing SQL queries across multiple DataFrames with table registration and management.
class SQLContext:
def __init__(self):
"""Create new SQL context."""
def register(self, name: str, frame) -> None:
"""
Register DataFrame or LazyFrame for SQL queries.
Parameters:
- name: Table name for SQL queries
- frame: DataFrame or LazyFrame to register
"""
def execute(self, query: str, *, eager=True) -> DataFrame | LazyFrame:
"""
Execute SQL query.
Parameters:
- query: SQL query string
- eager: Return DataFrame if True, LazyFrame if False
Returns:
Query results as DataFrame or LazyFrame
"""
def tables(self) -> list[str]:
"""
Get list of registered table names.
Returns:
List of registered table names
"""
def unregister(self, name: str) -> None:
"""
Unregister table from context.
Parameters:
- name: Table name to unregister
"""Direct SQL execution functions for single queries and SQL expressions within DataFrame operations.
def sql(query: str, *, eager=True, **kwargs) -> DataFrame | LazyFrame:
"""
Execute SQL query on DataFrames.
Parameters:
- query: SQL query string
- eager: Return DataFrame if True, LazyFrame if False
- kwargs: DataFrames accessible by name in SQL
Returns:
Query results as DataFrame or LazyFrame
"""
def sql_expr(sql: str) -> Expr:
"""
Create expression from SQL string.
Parameters:
- sql: SQL expression string
Returns:
Expression object
"""import polars as pl
# Create sample data
df1 = pl.DataFrame({
"id": [1, 2, 3],
"name": ["Alice", "Bob", "Charlie"],
"age": [25, 30, 35]
})
df2 = pl.DataFrame({
"id": [1, 2, 3],
"department": ["Engineering", "Sales", "Marketing"],
"salary": [70000, 50000, 60000]
})
# Direct SQL execution
result = pl.sql(
"SELECT * FROM df1 WHERE age > 28",
df1=df1
)
# Join tables with SQL
result = pl.sql("""
SELECT u.name, u.age, d.department, d.salary
FROM df1 u
JOIN df2 d ON u.id = d.id
WHERE u.age > 25
""", df1=df1, df2=df2)# Create SQL context
ctx = pl.SQLContext()
# Register tables
ctx.register("users", df1)
ctx.register("departments", df2)
# Execute queries
result = ctx.execute("""
SELECT
u.name,
u.age,
d.department,
d.salary,
CASE
WHEN d.salary > 60000 THEN 'High'
WHEN d.salary > 50000 THEN 'Medium'
ELSE 'Low'
END as salary_tier
FROM users u
JOIN departments d ON u.id = d.id
ORDER BY d.salary DESC
""")
# Check registered tables
print(ctx.tables()) # ['users', 'departments']# Use SQL expressions within DataFrame operations
df = pl.DataFrame({
"x": [1, 2, 3, 4, 5],
"y": [10, 20, 30, 40, 50]
})
result = df.with_columns([
pl.sql_expr("x * 2 + y").alias("calculated"),
pl.sql_expr("CASE WHEN x > 3 THEN 'high' ELSE 'low' END").alias("category")
])Install with Tessl CLI
npx tessl i tessl/pypi-polars