Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities
—
Python Database API specification compliance for SQL database compatibility. This interface enables BigQuery to work with database tools, ORMs, and applications that expect standard Python database connectivity.
Establish and manage database connections to BigQuery with authentication and configuration.
def connect(
client: bigquery.Client = None,
bqstorage_client: bigquery_storage.BigQueryReadClient = None,
prefer_bqstorage_client: bool = True,
**kwargs
) -> Connection:
"""
Create a DB-API connection to BigQuery.
Args:
client: BigQuery client instance. If None, creates default client.
bqstorage_client: BigQuery Storage client for faster data transfer.
prefer_bqstorage_client: Prefer Storage API when available.
**kwargs: Additional connection parameters.
Returns:
Connection: DB-API connection object.
"""
class Connection:
def __init__(self, client: bigquery.Client = None, **kwargs): ...
def close(self) -> None:
"""Close the connection."""
def commit(self) -> None:
"""Commit current transaction (no-op for BigQuery)."""
def rollback(self) -> None:
"""Rollback current transaction (no-op for BigQuery)."""
def cursor(self) -> Cursor:
"""Create a new cursor object."""
@property
def closed(self) -> bool:
"""True if connection is closed."""Execute SQL statements and fetch results using the standard DB-API cursor interface.
class Cursor:
def __init__(self, connection: Connection): ...
def close(self) -> None:
"""Close the cursor."""
def execute(self, query: str, parameters: Any = None) -> None:
"""
Execute a SQL query.
Args:
query: SQL query string, may contain parameter placeholders.
parameters: Query parameters (dict or sequence).
"""
def executemany(self, query: str, parameters_list: List[Any]) -> None:
"""
Execute a SQL query multiple times with different parameters.
Args:
query: SQL query string with parameter placeholders.
parameters_list: List of parameter sets.
"""
def fetchone(self) -> Optional[Tuple[Any, ...]]:
"""
Fetch next row from query results.
Returns:
Tuple: Row data as tuple, or None if no more rows.
"""
def fetchmany(self, size: int = None) -> List[Tuple[Any, ...]]:
"""
Fetch multiple rows from query results.
Args:
size: Number of rows to fetch. If None, uses arraysize.
Returns:
List[Tuple]: List of row tuples.
"""
def fetchall(self) -> List[Tuple[Any, ...]]:
"""
Fetch all remaining rows from query results.
Returns:
List[Tuple]: List of all row tuples.
"""
def setinputsizes(self, sizes: List[int]) -> None:
"""Set input parameter sizes (no-op for BigQuery)."""
def setoutputsize(self, size: int, column: int = None) -> None:
"""Set output column buffer size (no-op for BigQuery)."""
@property
def description(self) -> List[Tuple[str, str, None, None, None, None, None]]:
"""Column descriptions for current result set."""
@property
def rowcount(self) -> int:
"""Number of rows affected by last operation."""
@property
def arraysize(self) -> int:
"""Default number of rows fetchmany() should return."""
@arraysize.setter
def arraysize(self, value: int): ...Standard DB-API module-level constants and metadata.
# API compliance level
apilevel: str = "2.0"
# Thread safety level (2 = threads may share module and connections)
threadsafety: int = 2
# Parameter style (pyformat = %(name)s style)
paramstyle: str = "pyformat"Standard DB-API exception classes for error handling.
class Warning(Exception):
"""Warning exception for non-fatal issues."""
class Error(Exception):
"""Base exception for all database-related errors."""
class InterfaceError(Error):
"""Exception for database interface errors."""
class DatabaseError(Error):
"""Exception for database-related errors."""
class DataError(DatabaseError):
"""Exception for data processing errors."""
class OperationalError(DatabaseError):
"""Exception for operational database errors."""
class IntegrityError(DatabaseError):
"""Exception for data integrity constraint violations."""
class InternalError(DatabaseError):
"""Exception for internal database errors."""
class ProgrammingError(DatabaseError):
"""Exception for programming errors in SQL or API usage."""
class NotSupportedError(DatabaseError):
"""Exception for unsupported database features."""DB-API type constructors and type constants for data handling.
def Binary(data: bytes) -> bytes:
"""Construct binary data object."""
def Date(year: int, month: int, day: int) -> datetime.date:
"""Construct date object."""
def DateFromTicks(ticks: float) -> datetime.date:
"""Construct date from timestamp."""
def Time(hour: int, minute: int, second: int) -> datetime.time:
"""Construct time object."""
def TimeFromTicks(ticks: float) -> datetime.time:
"""Construct time from timestamp."""
def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime.datetime:
"""Construct timestamp object."""
def TimestampFromTicks(ticks: float) -> datetime.datetime:
"""Construct timestamp from timestamp."""
# Type constants for column type identification
BINARY: str = "BINARY"
DATETIME: str = "DATETIME"
NUMBER: str = "NUMBER"
ROWID: str = "ROWID"
STRING: str = "STRING"from google.cloud.bigquery import dbapi
# Create connection using default credentials
connection = dbapi.connect()
# Create cursor
cursor = connection.cursor()
# Execute simple query
cursor.execute("""
SELECT name, age, city
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'CA'
LIMIT 10
""")
# Fetch results
rows = cursor.fetchall()
for row in rows:
print(f"Name: {row[0]}, Age: {row[1]}, City: {row[2]}")
# Clean up
cursor.close()
connection.close()# Query with parameters using pyformat style
cursor.execute("""
SELECT product_id, product_name, price
FROM `my_project.my_dataset.products`
WHERE category = %(category)s
AND price BETWEEN %(min_price)s AND %(max_price)s
ORDER BY price DESC
LIMIT %(limit)s
""", {
'category': 'Electronics',
'min_price': 100.00,
'max_price': 1000.00,
'limit': 20
})
# Process results one by one
while True:
row = cursor.fetchone()
if row is None:
break
print(f"Product: {row[1]} - ${row[2]}")# Insert multiple rows using executemany
insert_query = """
INSERT INTO `my_project.my_dataset.users` (name, email, age)
VALUES (%(name)s, %(email)s, %(age)s)
"""
users_data = [
{'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
{'name': 'Bob', 'email': 'bob@example.com', 'age': 25},
{'name': 'Charlie', 'email': 'charlie@example.com', 'age': 35},
]
cursor.executemany(insert_query, users_data)
print(f"Inserted {cursor.rowcount} rows")# Execute query and examine result metadata
cursor.execute("""
SELECT user_id, name, email, created_at, last_login
FROM `my_project.my_dataset.users`
LIMIT 5
""")
# Print column information
print("Column descriptions:")
for i, desc in enumerate(cursor.description):
column_name = desc[0]
column_type = desc[1]
print(f" {i}: {column_name} ({column_type})")
# Fetch data with column awareness
results = cursor.fetchall()
for row in results:
print(f"User ID: {row[0]}, Name: {row[1]}, Email: {row[2]}")from google.cloud.bigquery.dbapi import (
Error, DatabaseError, ProgrammingError, OperationalError
)
try:
cursor.execute("""
SELECT COUNT(*) FROM `nonexistent.dataset.table`
""")
result = cursor.fetchone()
except ProgrammingError as e:
print(f"SQL Programming Error: {e}")
except OperationalError as e:
print(f"Operational Error: {e}")
except DatabaseError as e:
print(f"Database Error: {e}")
except Error as e:
print(f"General DB Error: {e}")import pandas as pd
from google.cloud.bigquery import dbapi
# Create connection
connection = dbapi.connect()
# Use pandas with DB-API connection
query = """
SELECT date, revenue, orders
FROM `my_project.my_dataset.daily_sales`
WHERE date >= '2023-01-01'
ORDER BY date
"""
# Read data into pandas DataFrame
df = pd.read_sql(query, connection)
print(df.head())
# Basic analysis
print(f"Total revenue: ${df['revenue'].sum():,.2f}")
print(f"Average daily orders: {df['orders'].mean():.1f}")
connection.close()# Use connection as context manager
with dbapi.connect() as connection:
with connection.cursor() as cursor:
# Execute multiple queries in transaction context
cursor.execute("""
CREATE OR REPLACE TABLE `my_project.my_dataset.temp_analysis` AS
SELECT
customer_id,
COUNT(*) as order_count,
SUM(total_amount) as total_spent
FROM `my_project.my_dataset.orders`
WHERE order_date >= '2023-01-01'
GROUP BY customer_id
""")
cursor.execute("""
SELECT customer_id, total_spent
FROM `my_project.my_dataset.temp_analysis`
WHERE total_spent > 1000
ORDER BY total_spent DESC
LIMIT 10
""")
# Fetch high-value customers
high_value_customers = cursor.fetchall()
for customer_id, total_spent in high_value_customers:
print(f"Customer {customer_id}: ${total_spent:,.2f}")
# Connection automatically closed when exiting contextfrom google.cloud import bigquery
from google.cloud.bigquery import dbapi
# Create custom BigQuery client
client = bigquery.Client(
project="my-project",
location="US"
)
# Create connection with custom client
connection = dbapi.connect(client=client)
# Use connection with custom configuration
cursor = connection.cursor()
cursor.execute("""
SELECT
table_name,
row_count,
size_bytes
FROM `my-project.information_schema.table_storage`
WHERE table_schema = 'my_dataset'
ORDER BY size_bytes DESC
""")
# Show table sizes
print("Dataset table sizes:")
for table_name, row_count, size_bytes in cursor.fetchmany(10):
size_mb = size_bytes / (1024 * 1024) if size_bytes else 0
print(f" {table_name}: {row_count:,} rows, {size_mb:.2f} MB")
cursor.close()
connection.close()# Handle large result sets with streaming
cursor.execute("""
SELECT user_id, event_timestamp, event_type
FROM `my_project.my_dataset.user_events`
WHERE event_date = CURRENT_DATE()
ORDER BY event_timestamp
""")
# Process results in batches to manage memory
batch_size = 1000
total_processed = 0
while True:
batch = cursor.fetchmany(batch_size)
if not batch:
break
# Process batch
for user_id, timestamp, event_type in batch:
# Process individual event
print(f"User {user_id}: {event_type} at {timestamp}")
total_processed += len(batch)
print(f"Processed {total_processed} events so far...")
print(f"Total events processed: {total_processed}")Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery