or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-operations.mddata-loading.mddatabase-api.mddataset-management.mdindex.mdmodels-routines.mdquery-operations.mdquery-parameters.mdschema-definition.mdtable-operations.md

query-operations.mddocs/

0

# Query Operations

1

2

SQL query execution with parameters, job configuration, and result processing. BigQuery supports both simple ad-hoc queries and complex analytical workloads with features like pagination, streaming, and integration with data science libraries.

3

4

## Capabilities

5

6

### Query Job Execution

7

8

Execute SQL queries asynchronously with comprehensive job monitoring and configuration options.

9

10

```python { .api }

11

class QueryJob:

12

def __init__(self, job_id: str, query: str, client: Client): ...

13

14

@property

15

def state(self) -> str:

16

"""Current state of the job ('PENDING', 'RUNNING', 'DONE')."""

17

18

@property

19

def query(self) -> str:

20

"""SQL query being executed."""

21

22

@property

23

def job_id(self) -> str:

24

"""Unique identifier for this job."""

25

26

@property

27

def location(self) -> str:

28

"""Location where the job is running."""

29

30

@property

31

def created(self) -> datetime.datetime:

32

"""Timestamp when the job was created."""

33

34

@property

35

def started(self) -> datetime.datetime:

36

"""Timestamp when the job started running."""

37

38

@property

39

def ended(self) -> datetime.datetime:

40

"""Timestamp when the job completed."""

41

42

@property

43

def total_bytes_processed(self) -> int:

44

"""Total bytes processed by the query."""

45

46

@property

47

def total_bytes_billed(self) -> int:

48

"""Total bytes billed for the query."""

49

50

@property

51

def slot_millis(self) -> int:

52

"""Slot milliseconds consumed by the query."""

53

54

@property

55

def num_dml_affected_rows(self) -> int:

56

"""Number of rows affected by DML statement."""

57

58

def result(

59

self,

60

page_size: int = None,

61

max_results: int = None,

62

retry: google.api_core.retry.Retry = DEFAULT_RETRY,

63

timeout: float = None,

64

start_index: int = None,

65

) -> google.cloud.bigquery.table.RowIterator:

66

"""

67

Wait for query completion and return results.

68

69

Args:

70

page_size: Number of rows per page.

71

max_results: Maximum total rows to return.

72

retry: Retry configuration for polling.

73

timeout: Timeout in seconds for polling.

74

start_index: Zero-based index of first row to return.

75

76

Returns:

77

RowIterator: Iterator over query results.

78

"""

79

80

def to_dataframe(

81

self,

82

create_bqstorage_client: bool = True,

83

dtypes: Dict[str, str] = None,

84

progress_bar_type: str = None,

85

**kwargs

86

) -> pandas.DataFrame:

87

"""

88

Return query results as a pandas DataFrame.

89

90

Args:

91

create_bqstorage_client: Use BigQuery Storage API for faster downloads.

92

dtypes: Pandas data types for specific columns.

93

progress_bar_type: Type of progress bar ('tqdm', None).

94

95

Returns:

96

pandas.DataFrame: Query results as DataFrame.

97

"""

98

99

def to_arrow(

100

self,

101

create_bqstorage_client: bool = True,

102

progress_bar_type: str = None,

103

) -> pyarrow.Table:

104

"""

105

Return query results as a PyArrow Table.

106

107

Args:

108

create_bqstorage_client: Use BigQuery Storage API for faster downloads.

109

progress_bar_type: Type of progress bar ('tqdm', None).

110

111

Returns:

112

pyarrow.Table: Query results as PyArrow Table.

113

"""

114

115

def cancel(

116

self,

117

retry: google.api_core.retry.Retry = DEFAULT_RETRY,

118

timeout: float = None,

119

) -> bool:

120

"""

121

Cancel the query job.

122

123

Args:

124

retry: Retry configuration.

125

timeout: Timeout in seconds.

126

127

Returns:

128

bool: True if cancellation was successful.

129

"""

130

```

131

132

### Query Job Configuration

133

134

Configure query behavior, performance, and output options.

135

136

```python { .api }

137

class QueryJobConfig:

138

def __init__(self, **kwargs): ...

139

140

@property

141

def allow_large_results(self) -> bool:

142

"""Allow large results that exceed response size limits."""

143

144

@allow_large_results.setter

145

def allow_large_results(self, value: bool): ...

146

147

@property

148

def create_disposition(self) -> str:

149

"""Action when destination table doesn't exist."""

150

151

@create_disposition.setter

152

def create_disposition(self, value: str): ...

153

154

@property

155

def default_dataset(self) -> DatasetReference:

156

"""Default dataset for unqualified table names."""

157

158

@default_dataset.setter

159

def default_dataset(self, value: DatasetReference): ...

160

161

@property

162

def destination(self) -> TableReference:

163

"""Table to store query results."""

164

165

@destination.setter

166

def destination(self, value: TableReference): ...

167

168

@property

169

def dry_run(self) -> bool:

170

"""Validate query without executing it."""

171

172

@dry_run.setter

173

def dry_run(self, value: bool): ...

174

175

@property

176

def maximum_bytes_billed(self) -> int:

177

"""Maximum bytes that can be billed."""

178

179

@maximum_bytes_billed.setter

180

def maximum_bytes_billed(self, value: int): ...

181

182

@property

183

def priority(self) -> str:

184

"""Query priority ('INTERACTIVE' or 'BATCH')."""

185

186

@priority.setter

187

def priority(self, value: str): ...

188

189

@property

190

def query_parameters(self) -> List[Union[ScalarQueryParameter, ArrayQueryParameter, StructQueryParameter]]:

191

"""Parameters for parameterized queries."""

192

193

@query_parameters.setter

194

def query_parameters(self, value: List): ...

195

196

@property

197

def use_legacy_sql(self) -> bool:

198

"""Use legacy SQL syntax instead of standard SQL."""

199

200

@use_legacy_sql.setter

201

def use_legacy_sql(self, value: bool): ...

202

203

@property

204

def use_query_cache(self) -> bool:

205

"""Enable query result caching."""

206

207

@use_query_cache.setter

208

def use_query_cache(self, value: bool): ...

209

210

@property

211

def write_disposition(self) -> str:

212

"""Action when destination table exists."""

213

214

@write_disposition.setter

215

def write_disposition(self, value: str): ...

216

217

@property

218

def labels(self) -> Dict[str, str]:

219

"""Labels for the query job."""

220

221

@labels.setter

222

def labels(self, value: Dict[str, str]): ...

223

224

@property

225

def job_timeout(self) -> datetime.timedelta:

226

"""Maximum time to wait for job completion."""

227

228

@job_timeout.setter

229

def job_timeout(self, value: datetime.timedelta): ...

230

```

231

232

### Result Processing

233

234

Process query results with support for pagination, type conversion, and data science integrations.

235

236

```python { .api }

237

class RowIterator:

238

def __init__(self, client: Client, query_job: QueryJob): ...

239

240

@property

241

def total_rows(self) -> int:

242

"""Total number of rows in the result set."""

243

244

@property

245

def schema(self) -> List[SchemaField]:

246

"""Schema of the result set."""

247

248

def to_dataframe(

249

self,

250

create_bqstorage_client: bool = True,

251

dtypes: Dict[str, str] = None,

252

progress_bar_type: str = None,

253

**kwargs

254

) -> pandas.DataFrame:

255

"""

256

Convert results to pandas DataFrame.

257

258

Args:

259

create_bqstorage_client: Use BigQuery Storage API.

260

dtypes: Pandas data types for columns.

261

progress_bar_type: Progress bar type.

262

263

Returns:

264

pandas.DataFrame: Results as DataFrame.

265

"""

266

267

def to_arrow(

268

self,

269

create_bqstorage_client: bool = True,

270

progress_bar_type: str = None,

271

) -> pyarrow.Table:

272

"""

273

Convert results to PyArrow Table.

274

275

Args:

276

create_bqstorage_client: Use BigQuery Storage API.

277

progress_bar_type: Progress bar type.

278

279

Returns:

280

pyarrow.Table: Results as PyArrow Table.

281

"""

282

283

class Row:

284

def __init__(self, values: List[Any], field_to_index: Dict[str, int]): ...

285

286

def values(self) -> List[Any]:

287

"""Return row values as a list."""

288

289

def keys(self) -> List[str]:

290

"""Return column names."""

291

292

def items(self) -> List[Tuple[str, Any]]:

293

"""Return (column_name, value) pairs."""

294

295

def get(self, key: str, default: Any = None) -> Any:

296

"""Get value by column name with optional default."""

297

```

298

299

### DML Statistics

300

301

Access detailed statistics for Data Manipulation Language (INSERT, UPDATE, DELETE) operations.

302

303

```python { .api }

304

class DmlStats:

305

def __init__(self, **kwargs): ...

306

307

@property

308

def inserted_row_count(self) -> int:

309

"""Number of rows inserted."""

310

311

@property

312

def deleted_row_count(self) -> int:

313

"""Number of rows deleted."""

314

315

@property

316

def updated_row_count(self) -> int:

317

"""Number of rows updated."""

318

```

319

320

### Script Execution

321

322

Execute multi-statement scripts with detailed execution statistics and error handling.

323

324

```python { .api }

325

class ScriptOptions:

326

def __init__(self, **kwargs): ...

327

328

@property

329

def statement_timeout_ms(self) -> int:

330

"""Timeout for individual statements in milliseconds."""

331

332

@statement_timeout_ms.setter

333

def statement_timeout_ms(self, value: int): ...

334

335

@property

336

def statement_byte_budget(self) -> int:

337

"""Maximum bytes processed per statement."""

338

339

@statement_byte_budget.setter

340

def statement_byte_budget(self, value: int): ...

341

342

class ScriptStatistics:

343

def __init__(self, **kwargs): ...

344

345

@property

346

def evaluation_kind(self) -> str:

347

"""Type of script evaluation."""

348

349

@property

350

def stack_frames(self) -> List[ScriptStackFrame]:

351

"""Execution stack frames."""

352

353

class ScriptStackFrame:

354

def __init__(self, **kwargs): ...

355

356

@property

357

def start_line(self) -> int:

358

"""Starting line number."""

359

360

@property

361

def start_column(self) -> int:

362

"""Starting column number."""

363

364

@property

365

def end_line(self) -> int:

366

"""Ending line number."""

367

368

@property

369

def end_column(self) -> int:

370

"""Ending column number."""

371

372

@property

373

def procedure_id(self) -> str:

374

"""Procedure identifier."""

375

376

@property

377

def text(self) -> str:

378

"""Stack frame text."""

379

```

380

381

## Usage Examples

382

383

### Basic Query Execution

384

385

```python

386

from google.cloud import bigquery

387

388

client = bigquery.Client()

389

390

# Simple query

391

query = """

392

SELECT name, COUNT(*) as count

393

FROM `bigquery-public-data.usa_names.usa_1910_2013`

394

WHERE state = 'CA'

395

GROUP BY name

396

ORDER BY count DESC

397

LIMIT 10

398

"""

399

400

query_job = client.query(query)

401

results = query_job.result()

402

403

# Process results

404

for row in results:

405

print(f"{row.name}: {row.count}")

406

```

407

408

### Parameterized Queries

409

410

```python

411

from google.cloud.bigquery import ScalarQueryParameter

412

413

# Query with parameters

414

query = """

415

SELECT name, COUNT(*) as count

416

FROM `bigquery-public-data.usa_names.usa_1910_2013`

417

WHERE state = @state AND year >= @min_year

418

GROUP BY name

419

ORDER BY count DESC

420

LIMIT @limit

421

"""

422

423

job_config = bigquery.QueryJobConfig(

424

query_parameters=[

425

ScalarQueryParameter("state", "STRING", "TX"),

426

ScalarQueryParameter("min_year", "INT64", 2000),

427

ScalarQueryParameter("limit", "INT64", 5),

428

]

429

)

430

431

query_job = client.query(query, job_config=job_config)

432

results = query_job.result()

433

434

for row in results:

435

print(f"{row.name}: {row.count}")

436

```

437

438

### Query with Configuration

439

440

```python

441

# Advanced query configuration

442

job_config = bigquery.QueryJobConfig(

443

destination=f"{client.project}.my_dataset.my_table",

444

write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,

445

priority=bigquery.QueryPriority.BATCH,

446

maximum_bytes_billed=1000000, # 1MB limit

447

use_query_cache=True,

448

labels={"team": "data-science", "env": "prod"}

449

)

450

451

query_job = client.query(query, job_config=job_config)

452

query_job.result() # Wait for completion

453

454

print(f"Query processed {query_job.total_bytes_processed} bytes")

455

print(f"Query billed {query_job.total_bytes_billed} bytes")

456

```

457

458

### Working with Large Results

459

460

```python

461

# Query with pagination

462

query = "SELECT * FROM `bigquery-public-data.samples.wikipedia`"

463

464

query_job = client.query(query)

465

466

# Process in batches

467

for page in query_job.result().pages:

468

for row in page:

469

# Process each row

470

print(row.title)

471

472

# Convert to pandas DataFrame

473

df = query_job.to_dataframe()

474

print(df.head())

475

476

# Convert to PyArrow for efficient processing

477

arrow_table = query_job.to_arrow()

478

print(arrow_table.schema)

479

```

480

481

### DML Operations

482

483

```python

484

# INSERT query

485

insert_query = """

486

INSERT INTO `my_project.my_dataset.my_table` (name, age, city)

487

VALUES

488

('Alice', 30, 'New York'),

489

('Bob', 25, 'San Francisco')

490

"""

491

492

query_job = client.query(insert_query)

493

query_job.result()

494

495

# Check DML statistics

496

if query_job.dml_stats:

497

print(f"Inserted {query_job.dml_stats.inserted_row_count} rows")

498

499

# UPDATE query

500

update_query = """

501

UPDATE `my_project.my_dataset.my_table`

502

SET age = age + 1

503

WHERE city = 'New York'

504

"""

505

506

query_job = client.query(update_query)

507

query_job.result()

508

509

if query_job.dml_stats:

510

print(f"Updated {query_job.dml_stats.updated_row_count} rows")

511

```

512

513

### Dry Run and Cost Estimation

514

515

```python

516

# Dry run to estimate cost

517

job_config = bigquery.QueryJobConfig(dry_run=True)

518

519

query_job = client.query(query, job_config=job_config)

520

521

print(f"This query will process {query_job.total_bytes_processed} bytes")

522

print(f"Estimated cost: ${query_job.total_bytes_processed / (1024**4) * 5:.2f}")

523

```