CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-bigquery

Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities

Pending
Overview
Eval results
Files

database-api.mddocs/

Database API (DB-API 2.0)

Python Database API specification compliance for SQL database compatibility. This interface enables BigQuery to work with database tools, ORMs, and applications that expect standard Python database connectivity.

Capabilities

Connection Management

Establish and manage database connections to BigQuery with authentication and configuration.

def connect(
    client: bigquery.Client = None,
    bqstorage_client: bigquery_storage.BigQueryReadClient = None,
    prefer_bqstorage_client: bool = True,
    **kwargs
) -> Connection:
    """
    Create a DB-API connection to BigQuery.

    Args:
        client: BigQuery client instance. If None, creates default client.
        bqstorage_client: BigQuery Storage client for faster data transfer.
        prefer_bqstorage_client: Prefer Storage API when available.
        **kwargs: Additional connection parameters.

    Returns:
        Connection: DB-API connection object.
    """

class Connection:
    def __init__(self, client: bigquery.Client = None, **kwargs): ...

    def close(self) -> None:
        """Close the connection."""

    def commit(self) -> None:
        """Commit current transaction (no-op for BigQuery)."""

    def rollback(self) -> None:
        """Rollback current transaction (no-op for BigQuery)."""

    def cursor(self) -> Cursor:
        """Create a new cursor object."""

    @property
    def closed(self) -> bool:
        """True if connection is closed."""

Cursor Operations

Execute SQL statements and fetch results using the standard DB-API cursor interface.

class Cursor:
    def __init__(self, connection: Connection): ...

    def close(self) -> None:
        """Close the cursor."""

    def execute(self, query: str, parameters: Any = None) -> None:
        """
        Execute a SQL query.

        Args:
            query: SQL query string, may contain parameter placeholders.
            parameters: Query parameters (dict or sequence).
        """

    def executemany(self, query: str, parameters_list: List[Any]) -> None:
        """
        Execute a SQL query multiple times with different parameters.

        Args:
            query: SQL query string with parameter placeholders.
            parameters_list: List of parameter sets.
        """

    def fetchone(self) -> Optional[Tuple[Any, ...]]:
        """
        Fetch next row from query results.

        Returns:
            Tuple: Row data as tuple, or None if no more rows.
        """

    def fetchmany(self, size: int = None) -> List[Tuple[Any, ...]]:
        """
        Fetch multiple rows from query results.

        Args:
            size: Number of rows to fetch. If None, uses arraysize.

        Returns:
            List[Tuple]: List of row tuples.
        """

    def fetchall(self) -> List[Tuple[Any, ...]]:
        """
        Fetch all remaining rows from query results.

        Returns:
            List[Tuple]: List of all row tuples.
        """

    def setinputsizes(self, sizes: List[int]) -> None:
        """Set input parameter sizes (no-op for BigQuery)."""

    def setoutputsize(self, size: int, column: int = None) -> None:
        """Set output column buffer size (no-op for BigQuery)."""

    @property
    def description(self) -> List[Tuple[str, str, None, None, None, None, None]]:
        """Column descriptions for current result set."""

    @property
    def rowcount(self) -> int:
        """Number of rows affected by last operation."""

    @property
    def arraysize(self) -> int:
        """Default number of rows fetchmany() should return."""

    @arraysize.setter
    def arraysize(self, value: int): ...

DB-API Constants

Standard DB-API module-level constants and metadata.

# API compliance level
apilevel: str = "2.0"

# Thread safety level (2 = threads may share module and connections)
threadsafety: int = 2

# Parameter style (pyformat = %(name)s style)
paramstyle: str = "pyformat"

Exception Hierarchy

Standard DB-API exception classes for error handling.

class Warning(Exception):
    """Warning exception for non-fatal issues."""

class Error(Exception):
    """Base exception for all database-related errors."""

class InterfaceError(Error):
    """Exception for database interface errors."""

class DatabaseError(Error):
    """Exception for database-related errors."""

class DataError(DatabaseError):
    """Exception for data processing errors."""

class OperationalError(DatabaseError):
    """Exception for operational database errors."""

class IntegrityError(DatabaseError):
    """Exception for data integrity constraint violations."""

class InternalError(DatabaseError):
    """Exception for internal database errors."""

class ProgrammingError(DatabaseError):
    """Exception for programming errors in SQL or API usage."""

class NotSupportedError(DatabaseError):
    """Exception for unsupported database features."""

Type Constructors and Constants

DB-API type constructors and type constants for data handling.

def Binary(data: bytes) -> bytes:
    """Construct binary data object."""

def Date(year: int, month: int, day: int) -> datetime.date:
    """Construct date object."""

def DateFromTicks(ticks: float) -> datetime.date:
    """Construct date from timestamp."""

def Time(hour: int, minute: int, second: int) -> datetime.time:
    """Construct time object."""

def TimeFromTicks(ticks: float) -> datetime.time:
    """Construct time from timestamp."""

def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime.datetime:
    """Construct timestamp object."""

def TimestampFromTicks(ticks: float) -> datetime.datetime:
    """Construct timestamp from timestamp."""

# Type constants for column type identification
BINARY: str = "BINARY"
DATETIME: str = "DATETIME"  
NUMBER: str = "NUMBER"
ROWID: str = "ROWID"
STRING: str = "STRING"

Usage Examples

Basic Connection and Query

from google.cloud.bigquery import dbapi

# Create connection using default credentials
connection = dbapi.connect()

# Create cursor
cursor = connection.cursor()

# Execute simple query
cursor.execute("""
    SELECT name, age, city
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
    WHERE state = 'CA'
    LIMIT 10
""")

# Fetch results
rows = cursor.fetchall()
for row in rows:
    print(f"Name: {row[0]}, Age: {row[1]}, City: {row[2]}")

# Clean up
cursor.close()
connection.close()

Parameterized Queries

# Query with parameters using pyformat style
cursor.execute("""
    SELECT product_id, product_name, price
    FROM `my_project.my_dataset.products`
    WHERE category = %(category)s
    AND price BETWEEN %(min_price)s AND %(max_price)s
    ORDER BY price DESC
    LIMIT %(limit)s
""", {
    'category': 'Electronics',
    'min_price': 100.00,
    'max_price': 1000.00,
    'limit': 20
})

# Process results one by one
while True:
    row = cursor.fetchone()
    if row is None:
        break
    print(f"Product: {row[1]} - ${row[2]}")

Batch Operations

# Insert multiple rows using executemany
insert_query = """
    INSERT INTO `my_project.my_dataset.users` (name, email, age)
    VALUES (%(name)s, %(email)s, %(age)s)
"""

users_data = [
    {'name': 'Alice', 'email': 'alice@example.com', 'age': 30},
    {'name': 'Bob', 'email': 'bob@example.com', 'age': 25},
    {'name': 'Charlie', 'email': 'charlie@example.com', 'age': 35},
]

cursor.executemany(insert_query, users_data)
print(f"Inserted {cursor.rowcount} rows")

Working with Result Metadata

# Execute query and examine result metadata
cursor.execute("""
    SELECT user_id, name, email, created_at, last_login
    FROM `my_project.my_dataset.users`
    LIMIT 5
""")

# Print column information
print("Column descriptions:")
for i, desc in enumerate(cursor.description):
    column_name = desc[0]
    column_type = desc[1] 
    print(f"  {i}: {column_name} ({column_type})")

# Fetch data with column awareness
results = cursor.fetchall()
for row in results:
    print(f"User ID: {row[0]}, Name: {row[1]}, Email: {row[2]}")

Error Handling

from google.cloud.bigquery.dbapi import (
    Error, DatabaseError, ProgrammingError, OperationalError
)

try:
    cursor.execute("""
        SELECT COUNT(*) FROM `nonexistent.dataset.table`
    """)
    result = cursor.fetchone()
    
except ProgrammingError as e:
    print(f"SQL Programming Error: {e}")
    
except OperationalError as e:
    print(f"Operational Error: {e}")
    
except DatabaseError as e:
    print(f"Database Error: {e}")
    
except Error as e:
    print(f"General DB Error: {e}")

Integration with pandas

import pandas as pd
from google.cloud.bigquery import dbapi

# Create connection
connection = dbapi.connect()

# Use pandas with DB-API connection
query = """
    SELECT date, revenue, orders
    FROM `my_project.my_dataset.daily_sales`
    WHERE date >= '2023-01-01'
    ORDER BY date
"""

# Read data into pandas DataFrame
df = pd.read_sql(query, connection)
print(df.head())

# Basic analysis
print(f"Total revenue: ${df['revenue'].sum():,.2f}")
print(f"Average daily orders: {df['orders'].mean():.1f}")

connection.close()

Context Manager Usage

# Use connection as context manager
with dbapi.connect() as connection:
    with connection.cursor() as cursor:
        # Execute multiple queries in transaction context
        cursor.execute("""
            CREATE OR REPLACE TABLE `my_project.my_dataset.temp_analysis` AS
            SELECT 
                customer_id,
                COUNT(*) as order_count,
                SUM(total_amount) as total_spent
            FROM `my_project.my_dataset.orders`
            WHERE order_date >= '2023-01-01'
            GROUP BY customer_id
        """)
        
        cursor.execute("""
            SELECT customer_id, total_spent
            FROM `my_project.my_dataset.temp_analysis`
            WHERE total_spent > 1000
            ORDER BY total_spent DESC
            LIMIT 10
        """)
        
        # Fetch high-value customers
        high_value_customers = cursor.fetchall()
        for customer_id, total_spent in high_value_customers:
            print(f"Customer {customer_id}: ${total_spent:,.2f}")

# Connection automatically closed when exiting context

Custom Connection Configuration

from google.cloud import bigquery
from google.cloud.bigquery import dbapi

# Create custom BigQuery client
client = bigquery.Client(
    project="my-project",
    location="US"
)

# Create connection with custom client
connection = dbapi.connect(client=client)

# Use connection with custom configuration
cursor = connection.cursor()
cursor.execute("""
    SELECT 
        table_name,
        row_count,
        size_bytes
    FROM `my-project.information_schema.table_storage`
    WHERE table_schema = 'my_dataset'
    ORDER BY size_bytes DESC
""")

# Show table sizes
print("Dataset table sizes:")
for table_name, row_count, size_bytes in cursor.fetchmany(10):
    size_mb = size_bytes / (1024 * 1024) if size_bytes else 0
    print(f"  {table_name}: {row_count:,} rows, {size_mb:.2f} MB")

cursor.close()
connection.close()

Streaming Results

# Handle large result sets with streaming
cursor.execute("""
    SELECT user_id, event_timestamp, event_type
    FROM `my_project.my_dataset.user_events`
    WHERE event_date = CURRENT_DATE()
    ORDER BY event_timestamp
""")

# Process results in batches to manage memory
batch_size = 1000
total_processed = 0

while True:
    batch = cursor.fetchmany(batch_size)
    if not batch:
        break
        
    # Process batch
    for user_id, timestamp, event_type in batch:
        # Process individual event
        print(f"User {user_id}: {event_type} at {timestamp}")
    
    total_processed += len(batch)
    print(f"Processed {total_processed} events so far...")

print(f"Total events processed: {total_processed}")

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-bigquery

docs

client-operations.md

data-loading.md

database-api.md

dataset-management.md

index.md

models-routines.md

query-operations.md

query-parameters.md

schema-definition.md

table-operations.md

tile.json