0
# SQL Execution Engine
1
2
Built-in SQL execution engine for running queries against in-memory data structures. Execute SQL against Python data, perform joins and aggregations, and integrate with pandas DataFrames and other data sources.
3
4
## Capabilities
5
6
### Core Execution
7
8
Execute SQL queries against in-memory tables and data structures.
9
10
```python { .api }
11
def execute(
12
sql: str | Expression,
13
schema: Optional[Schema] = None,
14
read: str = None,
15
dialect: str = None,
16
tables: Optional[Dict] = None
17
) -> Table:
18
"""
19
Execute SQL query against in-memory data.
20
21
Args:
22
sql: SQL statement or Expression to execute
23
schema (Schema): Database schema for type information and validation
24
read (str): SQL dialect for parsing
25
dialect (str): SQL dialect (alias for read)
26
tables (Dict): Additional tables to register for execution
27
28
Returns:
29
Table: Result table with columnar data
30
"""
31
```
32
33
### Table Management
34
35
Create and manage in-memory tables for SQL execution.
36
37
```python { .api }
38
class Table:
39
"""In-memory table representation for SQL execution."""
40
41
def __init__(self, columns: List[str], rows: List[List]):
42
"""
43
Create table with column names and row data.
44
45
Args:
46
columns (List[str]): Column names
47
rows (List[List]): Row data as list of lists
48
"""
49
50
@property
51
def columns(self) -> List[str]:
52
"""Get column names."""
53
54
@property
55
def rows(self) -> List[List]:
56
"""Get row data."""
57
58
def __getitem__(self, index: int) -> List:
59
"""Get row by index."""
60
61
def __len__(self) -> int:
62
"""Get number of rows."""
63
64
def to_dict(self) -> Dict[str, List]:
65
"""Convert table to dictionary format."""
66
67
def to_df(self):
68
"""Convert table to pandas DataFrame (requires pandas)."""
69
70
def ensure_tables(tables: Optional[Dict], dialect: str = None) -> Tables:
71
"""
72
Ensure tables are in proper format for execution.
73
74
Args:
75
tables (Dict): Tables to validate and convert
76
dialect (str): SQL dialect for parsing table definitions
77
78
Returns:
79
Tables: Validated tables container
80
"""
81
```
82
83
### Python Execution Engine
84
85
Core execution engine that runs SQL operations in Python.
86
87
```python { .api }
88
class PythonExecutor:
89
"""Python-based SQL execution engine."""
90
91
def __init__(self, tables: Optional[Tables] = None):
92
"""
93
Initialize executor with optional table registry.
94
95
Args:
96
tables (Tables): Pre-registered tables for execution
97
"""
98
99
def execute(self, plan: Plan) -> Table:
100
"""
101
Execute query plan and return results.
102
103
Args:
104
plan (Plan): Query execution plan
105
106
Returns:
107
Table: Execution results
108
"""
109
```
110
111
### Execution Context and Environment
112
113
Manage execution context and variable environments.
114
115
```python { .api }
116
class Context:
117
"""Execution context with variables and state."""
118
119
def __init__(self, tables: Optional[Tables] = None):
120
"""Initialize context with optional tables."""
121
122
def eval(self, expression: Expression) -> Any:
123
"""Evaluate expression in current context."""
124
125
def set_variable(self, name: str, value: Any) -> None:
126
"""Set variable in context."""
127
128
def get_variable(self, name: str) -> Any:
129
"""Get variable from context."""
130
131
class Environment:
132
"""Execution environment managing scope and variables."""
133
134
def __init__(self, context: Optional[Context] = None):
135
"""Initialize environment with optional context."""
136
```
137
138
## Usage Examples
139
140
### Basic SQL Execution
141
142
```python
143
import sqlglot
144
from sqlglot.executor import execute, Table
145
146
# Create sample data
147
users_data = Table(
148
columns=["id", "name", "age", "email"],
149
rows=[
150
[1, "Alice", 25, "alice@example.com"],
151
[2, "Bob", 30, "bob@example.com"],
152
[3, "Charlie", 35, "charlie@example.com"]
153
]
154
)
155
156
orders_data = Table(
157
columns=["id", "user_id", "amount", "date"],
158
rows=[
159
[1, 1, 100.50, "2023-01-15"],
160
[2, 2, 250.00, "2023-01-16"],
161
[3, 1, 75.25, "2023-01-17"],
162
[4, 3, 500.00, "2023-01-18"]
163
]
164
)
165
166
# Execute SQL query
167
tables = {
168
"users": users_data,
169
"orders": orders_data
170
}
171
172
result = execute(
173
"SELECT name, COUNT(*) as order_count FROM users JOIN orders ON users.id = orders.user_id GROUP BY name",
174
tables=tables
175
)
176
177
print(result.columns) # ['name', 'order_count']
178
print(result.rows) # [['Alice', 2], ['Bob', 1], ['Charlie', 1]]
179
```
180
181
### Working with Pandas DataFrames
182
183
```python
184
import pandas as pd
185
from sqlglot.executor import execute, Table
186
187
# Convert pandas DataFrame to SQLGlot Table
188
df = pd.DataFrame({
189
'product': ['A', 'B', 'C', 'A', 'B'],
190
'sales': [100, 200, 150, 120, 180],
191
'region': ['North', 'South', 'East', 'West', 'North']
192
})
193
194
# Convert to SQLGlot Table format
195
table = Table(
196
columns=list(df.columns),
197
rows=df.values.tolist()
198
)
199
200
# Execute aggregation query
201
result = execute(
202
"SELECT product, SUM(sales) as total_sales FROM products GROUP BY product ORDER BY total_sales DESC",
203
tables={"products": table}
204
)
205
206
# Convert result back to DataFrame
207
result_df = pd.DataFrame(result.to_dict())
208
print(result_df)
209
```
210
211
### Complex Queries with Joins
212
213
```python
214
from sqlglot.executor import execute, Table
215
from sqlglot.schema import MappingSchema
216
217
# Create related tables
218
customers = Table(
219
columns=["customer_id", "name", "city"],
220
rows=[
221
[1, "John Doe", "New York"],
222
[2, "Jane Smith", "Los Angeles"],
223
[3, "Bob Johnson", "Chicago"]
224
]
225
)
226
227
orders = Table(
228
columns=["order_id", "customer_id", "product", "quantity", "price"],
229
rows=[
230
[101, 1, "Widget A", 2, 25.00],
231
[102, 2, "Widget B", 1, 50.00],
232
[103, 1, "Widget C", 3, 15.00],
233
[104, 3, "Widget A", 1, 25.00]
234
]
235
)
236
237
# Define schema for type validation
238
schema = MappingSchema({
239
"customers": {
240
"customer_id": "INT",
241
"name": "VARCHAR",
242
"city": "VARCHAR"
243
},
244
"orders": {
245
"order_id": "INT",
246
"customer_id": "INT",
247
"product": "VARCHAR",
248
"quantity": "INT",
249
"price": "DECIMAL"
250
}
251
})
252
253
# Execute complex analytical query
254
sql = """
255
SELECT
256
c.name,
257
c.city,
258
COUNT(*) as order_count,
259
SUM(o.quantity * o.price) as total_spent,
260
AVG(o.quantity * o.price) as avg_order_value
261
FROM customers c
262
JOIN orders o ON c.customer_id = o.customer_id
263
GROUP BY c.customer_id, c.name, c.city
264
HAVING SUM(o.quantity * o.price) > 50
265
ORDER BY total_spent DESC
266
"""
267
268
result = execute(
269
sql,
270
schema=schema,
271
tables={"customers": customers, "orders": orders}
272
)
273
274
# Display results
275
for row in result.rows:
276
print(dict(zip(result.columns, row)))
277
```
278
279
### Window Functions and Analytics
280
281
```python
282
from sqlglot.executor import execute, Table
283
284
# Sales data for window function analysis
285
sales = Table(
286
columns=["salesperson", "month", "sales", "region"],
287
rows=[
288
["Alice", "2023-01", 1000, "North"],
289
["Alice", "2023-02", 1200, "North"],
290
["Alice", "2023-03", 1100, "North"],
291
["Bob", "2023-01", 800, "South"],
292
["Bob", "2023-02", 900, "South"],
293
["Bob", "2023-03", 1050, "South"],
294
["Charlie", "2023-01", 1300, "East"],
295
["Charlie", "2023-02", 1400, "East"],
296
["Charlie", "2023-03", 1250, "East"]
297
]
298
)
299
300
# Window function query
301
sql = """
302
SELECT
303
salesperson,
304
month,
305
sales,
306
region,
307
SUM(sales) OVER (PARTITION BY salesperson ORDER BY month) as running_total,
308
LAG(sales, 1) OVER (PARTITION BY salesperson ORDER BY month) as prev_month_sales,
309
sales - LAG(sales, 1) OVER (PARTITION BY salesperson ORDER BY month) as month_over_month_change,
310
RANK() OVER (ORDER BY sales DESC) as sales_rank,
311
DENSE_RANK() OVER (PARTITION BY region ORDER BY sales DESC) as region_rank
312
FROM sales_data
313
ORDER BY salesperson, month
314
"""
315
316
result = execute(sql, tables={"sales_data": sales})
317
318
# Print results with headers
319
print("\t".join(result.columns))
320
for row in result.rows:
321
print("\t".join(str(x) for x in row))
322
```
323
324
### Custom Functions and Expressions
325
326
```python
327
from sqlglot.executor import execute, Table
328
from sqlglot import expressions as exp
329
330
# Create table with custom data
331
data = Table(
332
columns=["text_col", "number_col", "date_col"],
333
rows=[
334
["Hello World", 42, "2023-01-15"],
335
["SQLGlot Rocks", 100, "2023-02-20"],
336
["Data Processing", 75, "2023-03-10"]
337
]
338
)
339
340
# Execute queries with various functions
341
queries = [
342
"SELECT UPPER(text_col) as upper_text FROM test_data",
343
"SELECT LENGTH(text_col) as text_length FROM test_data",
344
"SELECT number_col * 2 as doubled FROM test_data",
345
"SELECT SUBSTR(text_col, 1, 5) as first_five FROM test_data"
346
]
347
348
tables = {"test_data": data}
349
350
for sql in queries:
351
result = execute(sql, tables=tables)
352
print(f"Query: {sql}")
353
print(f"Results: {result.rows}")
354
print()
355
```
356
357
## Types
358
359
```python { .api }
360
class Table:
361
"""In-memory table for SQL execution."""
362
363
columns: List[str] # Column names
364
rows: List[List] # Row data
365
366
def __init__(self, columns: List[str], rows: List[List]): ...
367
def __getitem__(self, index: int) -> List: ...
368
def __len__(self) -> int: ...
369
def to_dict(self) -> Dict[str, List]: ...
370
371
class Tables:
372
"""Container for multiple tables with metadata."""
373
374
mapping: Dict[str, Table] # Table name to Table mapping
375
supported_table_args: Any # Supported table arguments
376
377
def __getitem__(self, name: str) -> Table: ...
378
def __contains__(self, name: str) -> bool: ...
379
380
class PythonExecutor:
381
"""Python-based SQL execution engine."""
382
383
tables: Tables # Available tables
384
385
def __init__(self, tables: Optional[Tables] = None): ...
386
def execute(self, plan: Plan) -> Table: ...
387
388
class Context:
389
"""Execution context with variables and state."""
390
391
tables: Optional[Tables] # Available tables
392
variables: Dict[str, Any] # Context variables
393
394
def eval(self, expression: Expression) -> Any: ...
395
def set_variable(self, name: str, value: Any) -> None: ...
396
def get_variable(self, name: str) -> Any: ...
397
398
class Environment:
399
"""Execution environment managing scope."""
400
401
context: Context # Execution context
402
403
def __init__(self, context: Optional[Context] = None): ...
404
```