0
# Database API (DB-API 2.0)
1
2
Python Database API specification compliance for SQL database compatibility. This interface enables BigQuery to work with database tools, ORMs, and applications that expect standard Python database connectivity.
3
4
## Capabilities
5
6
### Connection Management
7
8
Establish and manage database connections to BigQuery with authentication and configuration.
9
10
```python { .api }
11
def connect(
12
client: bigquery.Client = None,
13
bqstorage_client: bigquery_storage.BigQueryReadClient = None,
14
prefer_bqstorage_client: bool = True,
15
**kwargs
16
) -> Connection:
17
"""
18
Create a DB-API connection to BigQuery.
19
20
Args:
21
client: BigQuery client instance. If None, creates default client.
22
bqstorage_client: BigQuery Storage client for faster data transfer.
23
prefer_bqstorage_client: Prefer Storage API when available.
24
**kwargs: Additional connection parameters.
25
26
Returns:
27
Connection: DB-API connection object.
28
"""
29
30
class Connection:
31
def __init__(self, client: bigquery.Client = None, **kwargs): ...
32
33
def close(self) -> None:
34
"""Close the connection."""
35
36
def commit(self) -> None:
37
"""Commit current transaction (no-op for BigQuery)."""
38
39
def rollback(self) -> None:
40
"""Rollback current transaction (no-op for BigQuery)."""
41
42
def cursor(self) -> Cursor:
43
"""Create a new cursor object."""
44
45
@property
46
def closed(self) -> bool:
47
"""True if connection is closed."""
48
```
49
50
### Cursor Operations
51
52
Execute SQL statements and fetch results using the standard DB-API cursor interface.
53
54
```python { .api }
55
class Cursor:
56
def __init__(self, connection: Connection): ...
57
58
def close(self) -> None:
59
"""Close the cursor."""
60
61
def execute(self, query: str, parameters: Any = None) -> None:
62
"""
63
Execute a SQL query.
64
65
Args:
66
query: SQL query string, may contain parameter placeholders.
67
parameters: Query parameters (dict or sequence).
68
"""
69
70
def executemany(self, query: str, parameters_list: List[Any]) -> None:
71
"""
72
Execute a SQL query multiple times with different parameters.
73
74
Args:
75
query: SQL query string with parameter placeholders.
76
parameters_list: List of parameter sets.
77
"""
78
79
def fetchone(self) -> Optional[Tuple[Any, ...]]:
80
"""
81
Fetch next row from query results.
82
83
Returns:
84
Tuple: Row data as tuple, or None if no more rows.
85
"""
86
87
def fetchmany(self, size: int = None) -> List[Tuple[Any, ...]]:
88
"""
89
Fetch multiple rows from query results.
90
91
Args:
92
size: Number of rows to fetch. If None, uses arraysize.
93
94
Returns:
95
List[Tuple]: List of row tuples.
96
"""
97
98
def fetchall(self) -> List[Tuple[Any, ...]]:
99
"""
100
Fetch all remaining rows from query results.
101
102
Returns:
103
List[Tuple]: List of all row tuples.
104
"""
105
106
def setinputsizes(self, sizes: List[int]) -> None:
107
"""Set input parameter sizes (no-op for BigQuery)."""
108
109
def setoutputsize(self, size: int, column: int = None) -> None:
110
"""Set output column buffer size (no-op for BigQuery)."""
111
112
@property
113
def description(self) -> List[Tuple[str, str, None, None, None, None, None]]:
114
"""Column descriptions for current result set."""
115
116
@property
117
def rowcount(self) -> int:
118
"""Number of rows affected by last operation."""
119
120
@property
121
def arraysize(self) -> int:
122
"""Default number of rows fetchmany() should return."""
123
124
@arraysize.setter
125
def arraysize(self, value: int): ...
126
```
127
128
### DB-API Constants
129
130
Standard DB-API module-level constants and metadata.
131
132
```python { .api }
133
# API compliance level
134
apilevel: str = "2.0"
135
136
# Thread safety level (2 = threads may share module and connections)
137
threadsafety: int = 2
138
139
# Parameter style (pyformat = %(name)s style)
140
paramstyle: str = "pyformat"
141
```
142
143
### Exception Hierarchy
144
145
Standard DB-API exception classes for error handling.
146
147
```python { .api }
148
class Warning(Exception):
149
"""Warning exception for non-fatal issues."""
150
151
class Error(Exception):
152
"""Base exception for all database-related errors."""
153
154
class InterfaceError(Error):
155
"""Exception for database interface errors."""
156
157
class DatabaseError(Error):
158
"""Exception for database-related errors."""
159
160
class DataError(DatabaseError):
161
"""Exception for data processing errors."""
162
163
class OperationalError(DatabaseError):
164
"""Exception for operational database errors."""
165
166
class IntegrityError(DatabaseError):
167
"""Exception for data integrity constraint violations."""
168
169
class InternalError(DatabaseError):
170
"""Exception for internal database errors."""
171
172
class ProgrammingError(DatabaseError):
173
"""Exception for programming errors in SQL or API usage."""
174
175
class NotSupportedError(DatabaseError):
176
"""Exception for unsupported database features."""
177
```
178
179
### Type Constructors and Constants
180
181
DB-API type constructors and type constants for data handling.
182
183
```python { .api }
184
def Binary(data: bytes) -> bytes:
185
"""Construct binary data object."""
186
187
def Date(year: int, month: int, day: int) -> datetime.date:
188
"""Construct date object."""
189
190
def DateFromTicks(ticks: float) -> datetime.date:
191
"""Construct date from timestamp."""
192
193
def Time(hour: int, minute: int, second: int) -> datetime.time:
194
"""Construct time object."""
195
196
def TimeFromTicks(ticks: float) -> datetime.time:
197
"""Construct time from timestamp."""
198
199
def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime.datetime:
200
"""Construct timestamp object."""
201
202
def TimestampFromTicks(ticks: float) -> datetime.datetime:
203
"""Construct timestamp from timestamp."""
204
205
# Type constants for column type identification
206
BINARY: str = "BINARY"
207
DATETIME: str = "DATETIME"
208
NUMBER: str = "NUMBER"
209
ROWID: str = "ROWID"
210
STRING: str = "STRING"
211
```
212
213
## Usage Examples
214
215
### Basic Connection and Query
216
217
```python
218
from google.cloud.bigquery import dbapi
219
220
# Create connection using default credentials
221
connection = dbapi.connect()
222
223
# Create cursor
224
cursor = connection.cursor()
225
226
# Execute simple query
227
cursor.execute("""
228
SELECT name, age, city
229
FROM `bigquery-public-data.usa_names.usa_1910_2013`
230
WHERE state = 'CA'
231
LIMIT 10
232
""")
233
234
# Fetch results
235
rows = cursor.fetchall()
236
for row in rows:
237
print(f"Name: {row[0]}, Age: {row[1]}, City: {row[2]}")
238
239
# Clean up
240
cursor.close()
241
connection.close()
242
```
243
244
### Parameterized Queries
245
246
```python
247
# Query with parameters using pyformat style
248
cursor.execute("""
249
SELECT product_id, product_name, price
250
FROM `my_project.my_dataset.products`
251
WHERE category = %(category)s
252
AND price BETWEEN %(min_price)s AND %(max_price)s
253
ORDER BY price DESC
254
LIMIT %(limit)s
255
""", {
256
'category': 'Electronics',
257
'min_price': 100.00,
258
'max_price': 1000.00,
259
'limit': 20
260
})
261
262
# Process results one by one
263
while True:
264
row = cursor.fetchone()
265
if row is None:
266
break
267
print(f"Product: {row[1]} - ${row[2]}")
268
```
269
270
### Batch Operations
271
272
```python
273
# Insert multiple rows using executemany
274
insert_query = """
275
INSERT INTO `my_project.my_dataset.users` (name, email, age)
276
VALUES (%(name)s, %(email)s, %(age)s)
277
"""
278
279
users_data = [
280
{'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
281
{'name': 'Bob', 'email': 'bob@example.com', 'age': 25},
282
{'name': 'Charlie', 'email': 'charlie@example.com', 'age': 35},
283
]
284
285
cursor.executemany(insert_query, users_data)
286
print(f"Inserted {cursor.rowcount} rows")
287
```
288
289
### Working with Result Metadata
290
291
```python
292
# Execute query and examine result metadata
293
cursor.execute("""
294
SELECT user_id, name, email, created_at, last_login
295
FROM `my_project.my_dataset.users`
296
LIMIT 5
297
""")
298
299
# Print column information
300
print("Column descriptions:")
301
for i, desc in enumerate(cursor.description):
302
column_name = desc[0]
303
column_type = desc[1]
304
print(f" {i}: {column_name} ({column_type})")
305
306
# Fetch data with column awareness
307
results = cursor.fetchall()
308
for row in results:
309
print(f"User ID: {row[0]}, Name: {row[1]}, Email: {row[2]}")
310
```
311
312
### Error Handling
313
314
```python
315
from google.cloud.bigquery.dbapi import (
316
Error, DatabaseError, ProgrammingError, OperationalError
317
)
318
319
try:
320
cursor.execute("""
321
SELECT COUNT(*) FROM `nonexistent.dataset.table`
322
""")
323
result = cursor.fetchone()
324
325
except ProgrammingError as e:
326
print(f"SQL Programming Error: {e}")
327
328
except OperationalError as e:
329
print(f"Operational Error: {e}")
330
331
except DatabaseError as e:
332
print(f"Database Error: {e}")
333
334
except Error as e:
335
print(f"General DB Error: {e}")
336
```
337
338
### Integration with pandas
339
340
```python
341
import pandas as pd
342
from google.cloud.bigquery import dbapi
343
344
# Create connection
345
connection = dbapi.connect()
346
347
# Use pandas with DB-API connection
348
query = """
349
SELECT date, revenue, orders
350
FROM `my_project.my_dataset.daily_sales`
351
WHERE date >= '2023-01-01'
352
ORDER BY date
353
"""
354
355
# Read data into pandas DataFrame
356
df = pd.read_sql(query, connection)
357
print(df.head())
358
359
# Basic analysis
360
print(f"Total revenue: ${df['revenue'].sum():,.2f}")
361
print(f"Average daily orders: {df['orders'].mean():.1f}")
362
363
connection.close()
364
```
365
366
### Context Manager Usage
367
368
```python
369
# Use connection as context manager
370
with dbapi.connect() as connection:
371
with connection.cursor() as cursor:
372
# Execute multiple queries in transaction context
373
cursor.execute("""
374
CREATE OR REPLACE TABLE `my_project.my_dataset.temp_analysis` AS
375
SELECT
376
customer_id,
377
COUNT(*) as order_count,
378
SUM(total_amount) as total_spent
379
FROM `my_project.my_dataset.orders`
380
WHERE order_date >= '2023-01-01'
381
GROUP BY customer_id
382
""")
383
384
cursor.execute("""
385
SELECT customer_id, total_spent
386
FROM `my_project.my_dataset.temp_analysis`
387
WHERE total_spent > 1000
388
ORDER BY total_spent DESC
389
LIMIT 10
390
""")
391
392
# Fetch high-value customers
393
high_value_customers = cursor.fetchall()
394
for customer_id, total_spent in high_value_customers:
395
print(f"Customer {customer_id}: ${total_spent:,.2f}")
396
397
# Connection automatically closed when exiting context
398
```
399
400
### Custom Connection Configuration
401
402
```python
403
from google.cloud import bigquery
404
from google.cloud.bigquery import dbapi
405
406
# Create custom BigQuery client
407
client = bigquery.Client(
408
project="my-project",
409
location="US"
410
)
411
412
# Create connection with custom client
413
connection = dbapi.connect(client=client)
414
415
# Use connection with custom configuration
416
cursor = connection.cursor()
417
cursor.execute("""
418
SELECT
419
table_name,
420
row_count,
421
size_bytes
422
FROM `my-project.information_schema.table_storage`
423
WHERE table_schema = 'my_dataset'
424
ORDER BY size_bytes DESC
425
""")
426
427
# Show table sizes
428
print("Dataset table sizes:")
429
for table_name, row_count, size_bytes in cursor.fetchmany(10):
430
size_mb = size_bytes / (1024 * 1024) if size_bytes else 0
431
print(f" {table_name}: {row_count:,} rows, {size_mb:.2f} MB")
432
433
cursor.close()
434
connection.close()
435
```
436
437
### Streaming Results
438
439
```python
440
# Handle large result sets with streaming
441
cursor.execute("""
442
SELECT user_id, event_timestamp, event_type
443
FROM `my_project.my_dataset.user_events`
444
WHERE event_date = CURRENT_DATE()
445
ORDER BY event_timestamp
446
""")
447
448
# Process results in batches to manage memory
449
batch_size = 1000
450
total_processed = 0
451
452
while True:
453
batch = cursor.fetchmany(batch_size)
454
if not batch:
455
break
456
457
# Process batch
458
for user_id, timestamp, event_type in batch:
459
# Process individual event
460
print(f"User {user_id}: {event_type} at {timestamp}")
461
462
total_processed += len(batch)
463
print(f"Processed {total_processed} events so far...")
464
465
print(f"Total events processed: {total_processed}")
466
```