Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities
npx @tessl/cli install tessl/pypi-google-cloud-bigquery@3.36.0Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities. This library enables developers to interact with Google's cloud-based data warehouse, perform SQL queries on massive datasets, manage BigQuery resources, and integrate with the broader Google Cloud ecosystem.
pip install google-cloud-bigqueryfrom google.cloud import bigqueryMain client and commonly used classes:
from google.cloud.bigquery import Client, Dataset, Table, QueryJobImport specific components as needed:
from google.cloud.bigquery import (
SchemaField, LoadJob, ExtractJob,
QueryJobConfig, LoadJobConfig
)from google.cloud import bigquery
# Initialize the client
client = bigquery.Client()
# Simple query example
query = """
SELECT name, COUNT(*) as count
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'TX'
GROUP BY name
ORDER BY count DESC
LIMIT 10
"""
# Execute query and get results
query_job = client.query(query)
results = query_job.result()
# Process results
for row in results:
print(f"{row.name}: {row.count}")
# Working with datasets and tables
dataset_id = "my_dataset"
table_id = "my_table"
# Create dataset
dataset = bigquery.Dataset(f"{client.project}.{dataset_id}")
dataset = client.create_dataset(dataset, exists_ok=True)
# Define table schema
schema = [
bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
]
# Create table
table = bigquery.Table(f"{client.project}.{dataset_id}.{table_id}", schema=schema)
table = client.create_table(table, exists_ok=True)BigQuery client library follows a hierarchical resource model:
The library integrates seamlessly with pandas, PyArrow, and other data science tools, supports both synchronous and asynchronous operations, and provides comprehensive error handling and retry mechanisms.
Core client functionality for authentication, project management, and resource operations. Provides the main entry point for all BigQuery interactions.
class Client:
def __init__(self, project: str = None, credentials: Any = None, **kwargs): ...
def query(self, query: str, **kwargs) -> QueryJob: ...
def get_dataset(self, dataset_ref: str) -> Dataset: ...
def create_dataset(self, dataset: Dataset, **kwargs) -> Dataset: ...
def delete_dataset(self, dataset_ref: str, **kwargs) -> None: ...
def list_datasets(self, **kwargs) -> Iterator[Dataset]: ...SQL query execution with parameters, job configuration, and result processing. Supports both simple queries and complex analytical workloads with pagination and streaming.
class QueryJob:
def result(self, **kwargs) -> RowIterator: ...
def to_dataframe(self, **kwargs) -> pandas.DataFrame: ...
def to_arrow(self, **kwargs) -> pyarrow.Table: ...
class QueryJobConfig:
def __init__(self, **kwargs): ...
def query(self, query: str, job_config: QueryJobConfig = None, **kwargs) -> QueryJob: ...Dataset creation, configuration, access control, and metadata management. Datasets serve as containers for tables and other BigQuery resources.
class Dataset:
def __init__(self, dataset_ref: str): ...
class DatasetReference:
def __init__(self, project: str, dataset_id: str): ...
class AccessEntry:
def __init__(self, role: str, entity_type: str, entity_id: str): ...Table creation, schema management, data loading, and metadata operations. Includes support for partitioning, clustering, and various table types.
class Table:
def __init__(self, table_ref: str, schema: List[SchemaField] = None): ...
class TableReference:
def __init__(self, dataset_ref: DatasetReference, table_id: str): ...
class Row:
def values(self) -> List[Any]: ...
def keys(self) -> List[str]: ...Loading data from various sources including local files, Cloud Storage, streaming inserts, and data export. Supports multiple formats and transformation options.
class LoadJob:
def result(self, **kwargs) -> LoadJob: ...
class LoadJobConfig:
def __init__(self, **kwargs): ...
source_format: SourceFormat
schema: List[SchemaField]
write_disposition: WriteDisposition
class ExtractJob:
def result(self, **kwargs) -> ExtractJob: ...
class ExtractJobConfig:
def __init__(self, **kwargs): ...
destination_format: DestinationFormatType-safe schema definition with field specifications, modes, and descriptions. Essential for table creation and data validation.
class SchemaField:
def __init__(self, name: str, field_type: str, mode: str = "NULLABLE", **kwargs): ...
class FieldElementType:
def __init__(self, element_type: str): ...
class PolicyTagList:
def __init__(self, names: List[str]): ...Type-safe parameter binding for SQL queries supporting scalar, array, struct, and range parameter types with proper type validation.
class ScalarQueryParameter:
def __init__(self, name: str, type_: str, value: Any): ...
class ArrayQueryParameter:
def __init__(self, name: str, array_type: str, values: List[Any]): ...
class StructQueryParameter:
def __init__(self, name: str, *sub_params): ...Python Database API specification compliance for SQL database compatibility. Enables use with database tools and ORMs.
def connect(client: Client = None, **kwargs) -> Connection: ...
class Connection:
def cursor(self) -> Cursor: ...
def commit(self) -> None: ...
def close(self) -> None: ...
class Cursor:
def execute(self, query: str, parameters: Any = None) -> None: ...
def fetchall(self) -> List[Any]: ...BigQuery ML model management and user-defined functions (UDFs). Supports model creation, training, evaluation, and stored procedures.
class Model:
def __init__(self, model_ref: Union[str, ModelReference]): ...
class ModelReference:
def __init__(self, project: str, dataset_id: str, model_id: str): ...
class Routine:
def __init__(self, routine_ref: Union[str, RoutineReference], routine_type: str = None): ...
class RoutineReference:
def __init__(self, project: str, dataset_id: str, routine_id: str): ...
class RoutineArgument:
def __init__(self, name: str = None, argument_kind: str = None, mode: str = None, data_type: StandardSqlDataType = None): ...# Enums for job and table configuration
class SourceFormat:
CSV: str
JSON: str
AVRO: str
PARQUET: str
ORC: str
class WriteDisposition:
WRITE_EMPTY: str
WRITE_TRUNCATE: str
WRITE_APPEND: str
class CreateDisposition:
CREATE_IF_NEEDED: str
CREATE_NEVER: str
class QueryPriority:
BATCH: str
INTERACTIVE: str
# Exception classes
class LegacyBigQueryStorageError(Exception): ...
class LegacyPandasError(Exception): ...
class LegacyPyarrowError(Exception): ...
# Retry configuration
DEFAULT_RETRY: Retry