0
# Query Operations
1
2
SQL query execution with parameters, job configuration, and result processing. BigQuery supports both simple ad-hoc queries and complex analytical workloads with features like pagination, streaming, and integration with data science libraries.
3
4
## Capabilities
5
6
### Query Job Execution
7
8
Execute SQL queries asynchronously with comprehensive job monitoring and configuration options.
9
10
```python { .api }
11
class QueryJob:
12
def __init__(self, job_id: str, query: str, client: Client): ...
13
14
@property
15
def state(self) -> str:
16
"""Current state of the job ('PENDING', 'RUNNING', 'DONE')."""
17
18
@property
19
def query(self) -> str:
20
"""SQL query being executed."""
21
22
@property
23
def job_id(self) -> str:
24
"""Unique identifier for this job."""
25
26
@property
27
def location(self) -> str:
28
"""Location where the job is running."""
29
30
@property
31
def created(self) -> datetime.datetime:
32
"""Timestamp when the job was created."""
33
34
@property
35
def started(self) -> datetime.datetime:
36
"""Timestamp when the job started running."""
37
38
@property
39
def ended(self) -> datetime.datetime:
40
"""Timestamp when the job completed."""
41
42
@property
43
def total_bytes_processed(self) -> int:
44
"""Total bytes processed by the query."""
45
46
@property
47
def total_bytes_billed(self) -> int:
48
"""Total bytes billed for the query."""
49
50
@property
51
def slot_millis(self) -> int:
52
"""Slot milliseconds consumed by the query."""
53
54
@property
55
def num_dml_affected_rows(self) -> int:
56
"""Number of rows affected by DML statement."""
57
58
def result(
59
self,
60
page_size: int = None,
61
max_results: int = None,
62
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
63
timeout: float = None,
64
start_index: int = None,
65
) -> google.cloud.bigquery.table.RowIterator:
66
"""
67
Wait for query completion and return results.
68
69
Args:
70
page_size: Number of rows per page.
71
max_results: Maximum total rows to return.
72
retry: Retry configuration for polling.
73
timeout: Timeout in seconds for polling.
74
start_index: Zero-based index of first row to return.
75
76
Returns:
77
RowIterator: Iterator over query results.
78
"""
79
80
def to_dataframe(
81
self,
82
create_bqstorage_client: bool = True,
83
dtypes: Dict[str, str] = None,
84
progress_bar_type: str = None,
85
**kwargs
86
) -> pandas.DataFrame:
87
"""
88
Return query results as a pandas DataFrame.
89
90
Args:
91
create_bqstorage_client: Use BigQuery Storage API for faster downloads.
92
dtypes: Pandas data types for specific columns.
93
progress_bar_type: Type of progress bar ('tqdm', None).
94
95
Returns:
96
pandas.DataFrame: Query results as DataFrame.
97
"""
98
99
def to_arrow(
100
self,
101
create_bqstorage_client: bool = True,
102
progress_bar_type: str = None,
103
) -> pyarrow.Table:
104
"""
105
Return query results as a PyArrow Table.
106
107
Args:
108
create_bqstorage_client: Use BigQuery Storage API for faster downloads.
109
progress_bar_type: Type of progress bar ('tqdm', None).
110
111
Returns:
112
pyarrow.Table: Query results as PyArrow Table.
113
"""
114
115
def cancel(
116
self,
117
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
118
timeout: float = None,
119
) -> bool:
120
"""
121
Cancel the query job.
122
123
Args:
124
retry: Retry configuration.
125
timeout: Timeout in seconds.
126
127
Returns:
128
bool: True if cancellation was successful.
129
"""
130
```
131
132
### Query Job Configuration
133
134
Configure query behavior, performance, and output options.
135
136
```python { .api }
137
class QueryJobConfig:
138
def __init__(self, **kwargs): ...
139
140
@property
141
def allow_large_results(self) -> bool:
142
"""Allow large results that exceed response size limits."""
143
144
@allow_large_results.setter
145
def allow_large_results(self, value: bool): ...
146
147
@property
148
def create_disposition(self) -> str:
149
"""Action when destination table doesn't exist."""
150
151
@create_disposition.setter
152
def create_disposition(self, value: str): ...
153
154
@property
155
def default_dataset(self) -> DatasetReference:
156
"""Default dataset for unqualified table names."""
157
158
@default_dataset.setter
159
def default_dataset(self, value: DatasetReference): ...
160
161
@property
162
def destination(self) -> TableReference:
163
"""Table to store query results."""
164
165
@destination.setter
166
def destination(self, value: TableReference): ...
167
168
@property
169
def dry_run(self) -> bool:
170
"""Validate query without executing it."""
171
172
@dry_run.setter
173
def dry_run(self, value: bool): ...
174
175
@property
176
def maximum_bytes_billed(self) -> int:
177
"""Maximum bytes that can be billed."""
178
179
@maximum_bytes_billed.setter
180
def maximum_bytes_billed(self, value: int): ...
181
182
@property
183
def priority(self) -> str:
184
"""Query priority ('INTERACTIVE' or 'BATCH')."""
185
186
@priority.setter
187
def priority(self, value: str): ...
188
189
@property
190
def query_parameters(self) -> List[Union[ScalarQueryParameter, ArrayQueryParameter, StructQueryParameter]]:
191
"""Parameters for parameterized queries."""
192
193
@query_parameters.setter
194
def query_parameters(self, value: List): ...
195
196
@property
197
def use_legacy_sql(self) -> bool:
198
"""Use legacy SQL syntax instead of standard SQL."""
199
200
@use_legacy_sql.setter
201
def use_legacy_sql(self, value: bool): ...
202
203
@property
204
def use_query_cache(self) -> bool:
205
"""Enable query result caching."""
206
207
@use_query_cache.setter
208
def use_query_cache(self, value: bool): ...
209
210
@property
211
def write_disposition(self) -> str:
212
"""Action when destination table exists."""
213
214
@write_disposition.setter
215
def write_disposition(self, value: str): ...
216
217
@property
218
def labels(self) -> Dict[str, str]:
219
"""Labels for the query job."""
220
221
@labels.setter
222
def labels(self, value: Dict[str, str]): ...
223
224
@property
225
def job_timeout(self) -> datetime.timedelta:
226
"""Maximum time to wait for job completion."""
227
228
@job_timeout.setter
229
def job_timeout(self, value: datetime.timedelta): ...
230
```
231
232
### Result Processing
233
234
Process query results with support for pagination, type conversion, and data science integrations.
235
236
```python { .api }
237
class RowIterator:
238
def __init__(self, client: Client, query_job: QueryJob): ...
239
240
@property
241
def total_rows(self) -> int:
242
"""Total number of rows in the result set."""
243
244
@property
245
def schema(self) -> List[SchemaField]:
246
"""Schema of the result set."""
247
248
def to_dataframe(
249
self,
250
create_bqstorage_client: bool = True,
251
dtypes: Dict[str, str] = None,
252
progress_bar_type: str = None,
253
**kwargs
254
) -> pandas.DataFrame:
255
"""
256
Convert results to pandas DataFrame.
257
258
Args:
259
create_bqstorage_client: Use BigQuery Storage API.
260
dtypes: Pandas data types for columns.
261
progress_bar_type: Progress bar type.
262
263
Returns:
264
pandas.DataFrame: Results as DataFrame.
265
"""
266
267
def to_arrow(
268
self,
269
create_bqstorage_client: bool = True,
270
progress_bar_type: str = None,
271
) -> pyarrow.Table:
272
"""
273
Convert results to PyArrow Table.
274
275
Args:
276
create_bqstorage_client: Use BigQuery Storage API.
277
progress_bar_type: Progress bar type.
278
279
Returns:
280
pyarrow.Table: Results as PyArrow Table.
281
"""
282
283
class Row:
284
def __init__(self, values: List[Any], field_to_index: Dict[str, int]): ...
285
286
def values(self) -> List[Any]:
287
"""Return row values as a list."""
288
289
def keys(self) -> List[str]:
290
"""Return column names."""
291
292
def items(self) -> List[Tuple[str, Any]]:
293
"""Return (column_name, value) pairs."""
294
295
def get(self, key: str, default: Any = None) -> Any:
296
"""Get value by column name with optional default."""
297
```
298
299
### DML Statistics
300
301
Access detailed statistics for Data Manipulation Language (INSERT, UPDATE, DELETE) operations.
302
303
```python { .api }
304
class DmlStats:
305
def __init__(self, **kwargs): ...
306
307
@property
308
def inserted_row_count(self) -> int:
309
"""Number of rows inserted."""
310
311
@property
312
def deleted_row_count(self) -> int:
313
"""Number of rows deleted."""
314
315
@property
316
def updated_row_count(self) -> int:
317
"""Number of rows updated."""
318
```
319
320
### Script Execution
321
322
Execute multi-statement scripts with detailed execution statistics and error handling.
323
324
```python { .api }
325
class ScriptOptions:
326
def __init__(self, **kwargs): ...
327
328
@property
329
def statement_timeout_ms(self) -> int:
330
"""Timeout for individual statements in milliseconds."""
331
332
@statement_timeout_ms.setter
333
def statement_timeout_ms(self, value: int): ...
334
335
@property
336
def statement_byte_budget(self) -> int:
337
"""Maximum bytes processed per statement."""
338
339
@statement_byte_budget.setter
340
def statement_byte_budget(self, value: int): ...
341
342
class ScriptStatistics:
343
def __init__(self, **kwargs): ...
344
345
@property
346
def evaluation_kind(self) -> str:
347
"""Type of script evaluation."""
348
349
@property
350
def stack_frames(self) -> List[ScriptStackFrame]:
351
"""Execution stack frames."""
352
353
class ScriptStackFrame:
354
def __init__(self, **kwargs): ...
355
356
@property
357
def start_line(self) -> int:
358
"""Starting line number."""
359
360
@property
361
def start_column(self) -> int:
362
"""Starting column number."""
363
364
@property
365
def end_line(self) -> int:
366
"""Ending line number."""
367
368
@property
369
def end_column(self) -> int:
370
"""Ending column number."""
371
372
@property
373
def procedure_id(self) -> str:
374
"""Procedure identifier."""
375
376
@property
377
def text(self) -> str:
378
"""Stack frame text."""
379
```
380
381
## Usage Examples
382
383
### Basic Query Execution
384
385
```python
386
from google.cloud import bigquery
387
388
client = bigquery.Client()
389
390
# Simple query
391
query = """
392
SELECT name, COUNT(*) as count
393
FROM `bigquery-public-data.usa_names.usa_1910_2013`
394
WHERE state = 'CA'
395
GROUP BY name
396
ORDER BY count DESC
397
LIMIT 10
398
"""
399
400
query_job = client.query(query)
401
results = query_job.result()
402
403
# Process results
404
for row in results:
405
print(f"{row.name}: {row.count}")
406
```
407
408
### Parameterized Queries
409
410
```python
411
from google.cloud.bigquery import ScalarQueryParameter
412
413
# Query with parameters
414
query = """
415
SELECT name, COUNT(*) as count
416
FROM `bigquery-public-data.usa_names.usa_1910_2013`
417
WHERE state = @state AND year >= @min_year
418
GROUP BY name
419
ORDER BY count DESC
420
LIMIT @limit
421
"""
422
423
job_config = bigquery.QueryJobConfig(
424
query_parameters=[
425
ScalarQueryParameter("state", "STRING", "TX"),
426
ScalarQueryParameter("min_year", "INT64", 2000),
427
ScalarQueryParameter("limit", "INT64", 5),
428
]
429
)
430
431
query_job = client.query(query, job_config=job_config)
432
results = query_job.result()
433
434
for row in results:
435
print(f"{row.name}: {row.count}")
436
```
437
438
### Query with Configuration
439
440
```python
441
# Advanced query configuration
442
job_config = bigquery.QueryJobConfig(
443
destination=f"{client.project}.my_dataset.my_table",
444
write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
445
priority=bigquery.QueryPriority.BATCH,
446
maximum_bytes_billed=1000000, # 1MB limit
447
use_query_cache=True,
448
labels={"team": "data-science", "env": "prod"}
449
)
450
451
query_job = client.query(query, job_config=job_config)
452
query_job.result() # Wait for completion
453
454
print(f"Query processed {query_job.total_bytes_processed} bytes")
455
print(f"Query billed {query_job.total_bytes_billed} bytes")
456
```
457
458
### Working with Large Results
459
460
```python
461
# Query with pagination
462
query = "SELECT * FROM `bigquery-public-data.samples.wikipedia`"
463
464
query_job = client.query(query)
465
466
# Process in batches
467
for page in query_job.result().pages:
468
for row in page:
469
# Process each row
470
print(row.title)
471
472
# Convert to pandas DataFrame
473
df = query_job.to_dataframe()
474
print(df.head())
475
476
# Convert to PyArrow for efficient processing
477
arrow_table = query_job.to_arrow()
478
print(arrow_table.schema)
479
```
480
481
### DML Operations
482
483
```python
484
# INSERT query
485
insert_query = """
486
INSERT INTO `my_project.my_dataset.my_table` (name, age, city)
487
VALUES
488
('Alice', 30, 'New York'),
489
('Bob', 25, 'San Francisco')
490
"""
491
492
query_job = client.query(insert_query)
493
query_job.result()
494
495
# Check DML statistics
496
if query_job.dml_stats:
497
print(f"Inserted {query_job.dml_stats.inserted_row_count} rows")
498
499
# UPDATE query
500
update_query = """
501
UPDATE `my_project.my_dataset.my_table`
502
SET age = age + 1
503
WHERE city = 'New York'
504
"""
505
506
query_job = client.query(update_query)
507
query_job.result()
508
509
if query_job.dml_stats:
510
print(f"Updated {query_job.dml_stats.updated_row_count} rows")
511
```
512
513
### Dry Run and Cost Estimation
514
515
```python
516
# Dry run to estimate cost
517
job_config = bigquery.QueryJobConfig(dry_run=True)
518
519
query_job = client.query(query, job_config=job_config)
520
521
print(f"This query will process {query_job.total_bytes_processed} bytes")
522
print(f"Estimated cost: ${query_job.total_bytes_processed / (1024**4) * 5:.2f}")
523
```