Tessl Tile for pypi/google-cloud-bigquery@3.36.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

client-operations.md data-loading.md database-api.md dataset-management.md index.md models-routines.md query-operations.md query-parameters.md schema-definition.md table-operations.md

query-operations.mddocs/

0
# Query Operations
1

2
SQL query execution with parameters, job configuration, and result processing. BigQuery supports both simple ad-hoc queries and complex analytical workloads with features like pagination, streaming, and integration with data science libraries.
3

4
## Capabilities
5

6
### Query Job Execution
7

8
Execute SQL queries asynchronously with comprehensive job monitoring and configuration options.
9

10
```python { .api }
11
class QueryJob:
12
    def __init__(self, job_id: str, query: str, client: Client): ...
13

14
    @property
15
    def state(self) -> str:
16
        """Current state of the job ('PENDING', 'RUNNING', 'DONE')."""
17

18
    @property
19
    def query(self) -> str:
20
        """SQL query being executed."""
21

22
    @property
23
    def job_id(self) -> str:
24
        """Unique identifier for this job."""
25

26
    @property
27
    def location(self) -> str:
28
        """Location where the job is running."""
29

30
    @property
31
    def created(self) -> datetime.datetime:
32
        """Timestamp when the job was created."""
33

34
    @property
35
    def started(self) -> datetime.datetime:
36
        """Timestamp when the job started running."""
37

38
    @property
39
    def ended(self) -> datetime.datetime:
40
        """Timestamp when the job completed."""
41

42
    @property
43
    def total_bytes_processed(self) -> int:
44
        """Total bytes processed by the query."""
45

46
    @property
47
    def total_bytes_billed(self) -> int:
48
        """Total bytes billed for the query."""
49

50
    @property
51
    def slot_millis(self) -> int:
52
        """Slot milliseconds consumed by the query."""
53

54
    @property
55
    def num_dml_affected_rows(self) -> int:
56
        """Number of rows affected by DML statement."""
57

58
    def result(
59
        self,
60
        page_size: int = None,
61
        max_results: int = None,
62
        retry: google.api_core.retry.Retry = DEFAULT_RETRY,
63
        timeout: float = None,
64
        start_index: int = None,
65
    ) -> google.cloud.bigquery.table.RowIterator:
66
        """
67
        Wait for query completion and return results.
68

69
        Args:
70
            page_size: Number of rows per page.
71
            max_results: Maximum total rows to return.
72
            retry: Retry configuration for polling.
73
            timeout: Timeout in seconds for polling.
74
            start_index: Zero-based index of first row to return.
75

76
        Returns:
77
            RowIterator: Iterator over query results.
78
        """
79

80
    def to_dataframe(
81
        self,
82
        create_bqstorage_client: bool = True,
83
        dtypes: Dict[str, str] = None,
84
        progress_bar_type: str = None,
85
        **kwargs
86
    ) -> pandas.DataFrame:
87
        """
88
        Return query results as a pandas DataFrame.
89

90
        Args:
91
            create_bqstorage_client: Use BigQuery Storage API for faster downloads.
92
            dtypes: Pandas data types for specific columns.
93
            progress_bar_type: Type of progress bar ('tqdm', None).
94

95
        Returns:
96
            pandas.DataFrame: Query results as DataFrame.
97
        """
98

99
    def to_arrow(
100
        self,
101
        create_bqstorage_client: bool = True,
102
        progress_bar_type: str = None,
103
    ) -> pyarrow.Table:
104
        """
105
        Return query results as a PyArrow Table.
106

107
        Args:
108
            create_bqstorage_client: Use BigQuery Storage API for faster downloads.
109
            progress_bar_type: Type of progress bar ('tqdm', None).
110

111
        Returns:
112
            pyarrow.Table: Query results as PyArrow Table.
113
        """
114

115
    def cancel(
116
        self,
117
        retry: google.api_core.retry.Retry = DEFAULT_RETRY,
118
        timeout: float = None,
119
    ) -> bool:
120
        """
121
        Cancel the query job.
122

123
        Args:
124
            retry: Retry configuration.
125
            timeout: Timeout in seconds.
126

127
        Returns:
128
            bool: True if cancellation was successful.
129
        """
130
```
131

132
### Query Job Configuration
133

134
Configure query behavior, performance, and output options.
135

136
```python { .api }
137
class QueryJobConfig:
138
    def __init__(self, **kwargs): ...
139

140
    @property
141
    def allow_large_results(self) -> bool:
142
        """Allow large results that exceed response size limits."""
143

144
    @allow_large_results.setter
145
    def allow_large_results(self, value: bool): ...
146

147
    @property
148
    def create_disposition(self) -> str:
149
        """Action when destination table doesn't exist."""
150

151
    @create_disposition.setter
152
    def create_disposition(self, value: str): ...
153

154
    @property
155
    def default_dataset(self) -> DatasetReference:
156
        """Default dataset for unqualified table names."""
157

158
    @default_dataset.setter
159
    def default_dataset(self, value: DatasetReference): ...
160

161
    @property
162
    def destination(self) -> TableReference:
163
        """Table to store query results."""
164

165
    @destination.setter
166
    def destination(self, value: TableReference): ...
167

168
    @property
169
    def dry_run(self) -> bool:
170
        """Validate query without executing it."""
171

172
    @dry_run.setter
173
    def dry_run(self, value: bool): ...
174

175
    @property
176
    def maximum_bytes_billed(self) -> int:
177
        """Maximum bytes that can be billed."""
178

179
    @maximum_bytes_billed.setter
180
    def maximum_bytes_billed(self, value: int): ...
181

182
    @property
183
    def priority(self) -> str:
184
        """Query priority ('INTERACTIVE' or 'BATCH')."""
185

186
    @priority.setter
187
    def priority(self, value: str): ...
188

189
    @property
190
    def query_parameters(self) -> List[Union[ScalarQueryParameter, ArrayQueryParameter, StructQueryParameter]]:
191
        """Parameters for parameterized queries."""
192

193
    @query_parameters.setter
194
    def query_parameters(self, value: List): ...
195

196
    @property
197
    def use_legacy_sql(self) -> bool:
198
        """Use legacy SQL syntax instead of standard SQL."""
199

200
    @use_legacy_sql.setter
201
    def use_legacy_sql(self, value: bool): ...
202

203
    @property
204
    def use_query_cache(self) -> bool:
205
        """Enable query result caching."""
206

207
    @use_query_cache.setter
208
    def use_query_cache(self, value: bool): ...
209

210
    @property
211
    def write_disposition(self) -> str:
212
        """Action when destination table exists."""
213

214
    @write_disposition.setter
215
    def write_disposition(self, value: str): ...
216

217
    @property
218
    def labels(self) -> Dict[str, str]:
219
        """Labels for the query job."""
220

221
    @labels.setter
222
    def labels(self, value: Dict[str, str]): ...
223

224
    @property
225
    def job_timeout(self) -> datetime.timedelta:
226
        """Maximum time to wait for job completion."""
227

228
    @job_timeout.setter
229
    def job_timeout(self, value: datetime.timedelta): ...
230
```
231

232
### Result Processing
233

234
Process query results with support for pagination, type conversion, and data science integrations.
235

236
```python { .api }
237
class RowIterator:
238
    def __init__(self, client: Client, query_job: QueryJob): ...
239

240
    @property
241
    def total_rows(self) -> int:
242
        """Total number of rows in the result set."""
243

244
    @property
245
    def schema(self) -> List[SchemaField]:
246
        """Schema of the result set."""
247

248
    def to_dataframe(
249
        self,
250
        create_bqstorage_client: bool = True,
251
        dtypes: Dict[str, str] = None,
252
        progress_bar_type: str = None,
253
        **kwargs
254
    ) -> pandas.DataFrame:
255
        """
256
        Convert results to pandas DataFrame.
257

258
        Args:
259
            create_bqstorage_client: Use BigQuery Storage API.
260
            dtypes: Pandas data types for columns.
261
            progress_bar_type: Progress bar type.
262

263
        Returns:
264
            pandas.DataFrame: Results as DataFrame.
265
        """
266

267
    def to_arrow(
268
        self,
269
        create_bqstorage_client: bool = True,
270
        progress_bar_type: str = None,
271
    ) -> pyarrow.Table:
272
        """
273
        Convert results to PyArrow Table.
274

275
        Args:
276
            create_bqstorage_client: Use BigQuery Storage API.
277
            progress_bar_type: Progress bar type.
278

279
        Returns:
280
            pyarrow.Table: Results as PyArrow Table.
281
        """
282

283
class Row:
284
    def __init__(self, values: List[Any], field_to_index: Dict[str, int]): ...
285

286
    def values(self) -> List[Any]:
287
        """Return row values as a list."""
288

289
    def keys(self) -> List[str]:
290
        """Return column names."""
291

292
    def items(self) -> List[Tuple[str, Any]]:
293
        """Return (column_name, value) pairs."""
294

295
    def get(self, key: str, default: Any = None) -> Any:
296
        """Get value by column name with optional default."""
297
```
298

299
### DML Statistics
300

301
Access detailed statistics for Data Manipulation Language (INSERT, UPDATE, DELETE) operations.
302

303
```python { .api }
304
class DmlStats:
305
    def __init__(self, **kwargs): ...
306

307
    @property
308
    def inserted_row_count(self) -> int:
309
        """Number of rows inserted."""
310

311
    @property
312
    def deleted_row_count(self) -> int:
313
        """Number of rows deleted."""
314

315
    @property
316
    def updated_row_count(self) -> int:
317
        """Number of rows updated."""
318
```
319

320
### Script Execution
321

322
Execute multi-statement scripts with detailed execution statistics and error handling.
323

324
```python { .api }
325
class ScriptOptions:
326
    def __init__(self, **kwargs): ...
327

328
    @property
329
    def statement_timeout_ms(self) -> int:
330
        """Timeout for individual statements in milliseconds."""
331

332
    @statement_timeout_ms.setter
333
    def statement_timeout_ms(self, value: int): ...
334

335
    @property
336
    def statement_byte_budget(self) -> int:
337
        """Maximum bytes processed per statement."""
338

339
    @statement_byte_budget.setter
340
    def statement_byte_budget(self, value: int): ...
341

342
class ScriptStatistics:
343
    def __init__(self, **kwargs): ...
344

345
    @property
346
    def evaluation_kind(self) -> str:
347
        """Type of script evaluation."""
348

349
    @property
350
    def stack_frames(self) -> List[ScriptStackFrame]:
351
        """Execution stack frames."""
352

353
class ScriptStackFrame:
354
    def __init__(self, **kwargs): ...
355

356
    @property
357
    def start_line(self) -> int:
358
        """Starting line number."""
359

360
    @property
361
    def start_column(self) -> int:
362
        """Starting column number."""
363

364
    @property
365
    def end_line(self) -> int:
366
        """Ending line number."""
367

368
    @property
369
    def end_column(self) -> int:
370
        """Ending column number."""
371

372
    @property
373
    def procedure_id(self) -> str:
374
        """Procedure identifier."""
375

376
    @property
377
    def text(self) -> str:
378
        """Stack frame text."""
379
```
380

381
## Usage Examples
382

383
### Basic Query Execution
384

385
```python
386
from google.cloud import bigquery
387

388
client = bigquery.Client()
389

390
# Simple query
391
query = """
392
    SELECT name, COUNT(*) as count
393
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
394
    WHERE state = 'CA'
395
    GROUP BY name
396
    ORDER BY count DESC
397
    LIMIT 10
398
"""
399

400
query_job = client.query(query)
401
results = query_job.result()
402

403
# Process results
404
for row in results:
405
    print(f"{row.name}: {row.count}")
406
```
407

408
### Parameterized Queries
409

410
```python
411
from google.cloud.bigquery import ScalarQueryParameter
412

413
# Query with parameters
414
query = """
415
    SELECT name, COUNT(*) as count
416
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
417
    WHERE state = @state AND year >= @min_year
418
    GROUP BY name
419
    ORDER BY count DESC
420
    LIMIT @limit
421
"""
422

423
job_config = bigquery.QueryJobConfig(
424
    query_parameters=[
425
        ScalarQueryParameter("state", "STRING", "TX"),
426
        ScalarQueryParameter("min_year", "INT64", 2000),
427
        ScalarQueryParameter("limit", "INT64", 5),
428
    ]
429
)
430

431
query_job = client.query(query, job_config=job_config)
432
results = query_job.result()
433

434
for row in results:
435
    print(f"{row.name}: {row.count}")
436
```
437

438
### Query with Configuration
439

440
```python
441
# Advanced query configuration
442
job_config = bigquery.QueryJobConfig(
443
    destination=f"{client.project}.my_dataset.my_table",
444
    write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
445
    priority=bigquery.QueryPriority.BATCH,
446
    maximum_bytes_billed=1000000,  # 1MB limit
447
    use_query_cache=True,
448
    labels={"team": "data-science", "env": "prod"}
449
)
450

451
query_job = client.query(query, job_config=job_config)
452
query_job.result()  # Wait for completion
453

454
print(f"Query processed {query_job.total_bytes_processed} bytes")
455
print(f"Query billed {query_job.total_bytes_billed} bytes")
456
```
457

458
### Working with Large Results
459

460
```python
461
# Query with pagination
462
query = "SELECT * FROM `bigquery-public-data.samples.wikipedia`"
463

464
query_job = client.query(query)
465

466
# Process in batches
467
for page in query_job.result().pages:
468
    for row in page:
469
        # Process each row
470
        print(row.title)
471

472
# Convert to pandas DataFrame
473
df = query_job.to_dataframe()
474
print(df.head())
475

476
# Convert to PyArrow for efficient processing
477
arrow_table = query_job.to_arrow()
478
print(arrow_table.schema)
479
```
480

481
### DML Operations
482

483
```python
484
# INSERT query
485
insert_query = """
486
    INSERT INTO `my_project.my_dataset.my_table` (name, age, city)
487
    VALUES 
488
        ('Alice', 30, 'New York'),
489
        ('Bob', 25, 'San Francisco')
490
"""
491

492
query_job = client.query(insert_query)
493
query_job.result()
494

495
# Check DML statistics
496
if query_job.dml_stats:
497
    print(f"Inserted {query_job.dml_stats.inserted_row_count} rows")
498

499
# UPDATE query
500
update_query = """
501
    UPDATE `my_project.my_dataset.my_table`
502
    SET age = age + 1
503
    WHERE city = 'New York'
504
"""
505

506
query_job = client.query(update_query)
507
query_job.result()
508

509
if query_job.dml_stats:
510
    print(f"Updated {query_job.dml_stats.updated_row_count} rows")
511
```
512

513
### Dry Run and Cost Estimation
514

515
```python
516
# Dry run to estimate cost
517
job_config = bigquery.QueryJobConfig(dry_run=True)
518

519
query_job = client.query(query, job_config=job_config)
520

521
print(f"This query will process {query_job.total_bytes_processed} bytes")
522
print(f"Estimated cost: ${query_job.total_bytes_processed / (1024**4) * 5:.2f}")
523
```

Version

Tile

Files

query-operations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

query-operations.mddocs/