Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities
—
Core client functionality for authentication, project management, and resource operations. The Client class serves as the main entry point for all BigQuery interactions, providing authenticated access to Google Cloud BigQuery services.
Creates the primary interface to BigQuery with authentication and configuration options.
class Client:
def __init__(
self,
project: str = None,
credentials: google.auth.credentials.Credentials = None,
_http: requests.Session = None,
location: str = None,
default_query_job_config: QueryJobConfig = None,
default_load_job_config: LoadJobConfig = None,
client_info: google.api_core.client_info.ClientInfo = None,
client_options: google.api_core.client_options.ClientOptions = None,
):
"""
Initialize BigQuery client.
Args:
project: Google Cloud project ID. If None, inferred from environment.
credentials: OAuth2 credentials. If None, uses default credentials.
location: Default location for BigQuery operations.
default_query_job_config: Default configuration for query jobs.
default_load_job_config: Default configuration for load jobs.
client_info: Client library information.
client_options: Client configuration options.
"""
@property
def project(self) -> str:
"""Project ID associated with this client."""
@property
def location(self) -> str:
"""Default location for BigQuery operations."""Execute SQL queries and manage query jobs with comprehensive configuration options.
def query(
self,
query: str,
job_config: QueryJobConfig = None,
job_id: str = None,
job_retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
location: str = None,
project: str = None,
) -> QueryJob:
"""
Execute a SQL query and return a job.
Args:
query: SQL query string to execute.
job_config: Configuration for the query job.
job_id: Unique identifier for the job.
job_retry: Retry configuration for job creation.
timeout: Timeout in seconds for job creation.
location: Location where job should run.
project: Project ID for the job.
Returns:
QueryJob: Job instance for the query operation.
"""
def query_and_wait(
self,
query: str,
**kwargs
) -> google.cloud.bigquery.table.RowIterator:
"""
Execute query and wait for completion, returning results directly.
Args:
query: SQL query string to execute.
**kwargs: Additional arguments passed to query().
Returns:
RowIterator: Query results.
"""Manage BigQuery datasets including creation, deletion, and listing operations.
def create_dataset(
self,
dataset: Union[Dataset, DatasetReference, str],
exists_ok: bool = False,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Dataset:
"""
Create a new dataset.
Args:
dataset: Dataset to create.
exists_ok: If True, do not raise error if dataset already exists.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Dataset: The created dataset.
"""
def delete_dataset(
self,
dataset: Union[Dataset, DatasetReference, str],
delete_contents: bool = False,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> None:
"""
Delete a dataset.
Args:
dataset: Dataset to delete.
delete_contents: If True, delete all tables in dataset.
retry: Retry configuration.
timeout: Timeout in seconds.
"""
def get_dataset(
self,
dataset_ref: Union[DatasetReference, str],
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Dataset:
"""
Fetch dataset metadata.
Args:
dataset_ref: Reference to dataset to fetch.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Dataset: The requested dataset.
"""
def list_datasets(
self,
project: str = None,
include_all: bool = False,
filter: str = None,
max_results: int = None,
page_token: str = None,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> google.api_core.page_iterator.Iterator[Dataset]:
"""
List datasets in a project.
Args:
project: Project ID to list datasets from.
include_all: Include hidden datasets.
filter: Label filter expression.
max_results: Maximum datasets to return.
page_token: Token for pagination.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Iterator[Dataset]: Iterator of datasets.
"""
def update_dataset(
self,
dataset: Dataset,
fields: List[str],
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Dataset:
"""
Update dataset metadata.
Args:
dataset: Dataset with updated metadata.
fields: Fields to update.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Dataset: Updated dataset.
"""Manage BigQuery tables including creation, deletion, and metadata operations.
def create_table(
self,
table: Union[Table, TableReference, str],
exists_ok: bool = False,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Table:
"""
Create a new table.
Args:
table: Table to create.
exists_ok: If True, do not raise error if table already exists.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Table: The created table.
"""
def delete_table(
self,
table: Union[Table, TableReference, str],
not_found_ok: bool = False,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> None:
"""
Delete a table.
Args:
table: Table to delete.
not_found_ok: If True, do not raise error if table not found.
retry: Retry configuration.
timeout: Timeout in seconds.
"""
def get_table(
self,
table: Union[Table, TableReference, str],
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Table:
"""
Fetch table metadata.
Args:
table: Reference to table to fetch.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Table: The requested table.
"""
def list_tables(
self,
dataset: Union[Dataset, DatasetReference, str],
max_results: int = None,
page_token: str = None,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> google.api_core.page_iterator.Iterator[Table]:
"""
List tables in a dataset.
Args:
dataset: Dataset to list tables from.
max_results: Maximum tables to return.
page_token: Token for pagination.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Iterator[Table]: Iterator of tables.
"""
def update_table(
self,
table: Table,
fields: List[str],
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Table:
"""
Update table metadata.
Args:
table: Table with updated metadata.
fields: Fields to update.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Table: Updated table.
"""Monitor and control BigQuery jobs including queries, loads, extracts, and copies.
def get_job(
self,
job_id: str,
project: str = None,
location: str = None,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> Union[QueryJob, LoadJob, ExtractJob, CopyJob, UnknownJob]:
"""
Fetch job metadata.
Args:
job_id: Unique identifier for the job.
project: Project ID where job was created.
location: Location where job was created.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Job: The requested job instance.
"""
def list_jobs(
self,
project: str = None,
parent_job: str = None,
state_filter: str = None,
min_creation_time: datetime.datetime = None,
max_creation_time: datetime.datetime = None,
max_results: int = None,
page_token: str = None,
all_users: bool = None,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> google.api_core.page_iterator.Iterator:
"""
List jobs in a project.
Args:
project: Project ID to list jobs from.
parent_job: Parent job ID for script jobs.
state_filter: Filter by job state ('done', 'pending', 'running').
min_creation_time: Minimum job creation time.
max_creation_time: Maximum job creation time.
max_results: Maximum jobs to return.
page_token: Token for pagination.
all_users: Include jobs from all users.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
Iterator: Iterator of job instances.
"""
def cancel_job(
self,
job_id: str,
project: str = None,
location: str = None,
retry: google.api_core.retry.Retry = DEFAULT_RETRY,
timeout: float = None,
) -> bool:
"""
Cancel a job.
Args:
job_id: Unique identifier for the job.
project: Project ID where job was created.
location: Location where job was created.
retry: Retry configuration.
timeout: Timeout in seconds.
Returns:
bool: True if cancellation was successful.
"""Load and extract data to/from BigQuery tables with various configuration options.
def load_table_from_uri(
self,
source_uris: Union[str, List[str]],
destination: Union[Table, TableReference, str],
job_config: LoadJobConfig = None,
**kwargs
) -> LoadJob:
"""
Load data from Cloud Storage URIs.
Args:
source_uris: Cloud Storage URIs to load from.
destination: Destination table.
job_config: Configuration for the load job.
Returns:
LoadJob: Job instance for the load operation.
"""
def load_table_from_file(
self,
file_obj: typing.BinaryIO,
destination: Union[Table, TableReference, str],
rewind: bool = False,
size: int = None,
num_retries: int = 6,
job_config: LoadJobConfig = None,
**kwargs
) -> LoadJob:
"""
Load data from a file object.
Args:
file_obj: File-like object to load from.
destination: Destination table.
rewind: Whether to rewind file before loading.
size: Number of bytes to load.
num_retries: Number of upload retries.
job_config: Configuration for the load job.
Returns:
LoadJob: Job instance for the load operation.
"""
def extract_table(
self,
source: Union[Table, TableReference, str],
destination_uris: Union[str, List[str]],
job_config: ExtractJobConfig = None,
**kwargs
) -> ExtractJob:
"""
Extract data from a table to Cloud Storage.
Args:
source: Source table to extract from.
destination_uris: Cloud Storage URIs to extract to.
job_config: Configuration for the extract job.
Returns:
ExtractJob: Job instance for the extract operation.
"""from google.cloud import bigquery
# Use default credentials and project
client = bigquery.Client()
# Specify project explicitly
client = bigquery.Client(project="my-project-id")
# Use service account credentials
from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_file(
"path/to/service-account-key.json"
)
client = bigquery.Client(credentials=credentials, project="my-project-id")# Create a dataset
dataset_id = "my_new_dataset"
dataset = bigquery.Dataset(f"{client.project}.{dataset_id}")
dataset.location = "US"
dataset = client.create_dataset(dataset, exists_ok=True)
# List all datasets
datasets = list(client.list_datasets())
for dataset in datasets:
print(dataset.dataset_id)
# Create a table with schema
table_id = "my_new_table"
schema = [
bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
]
table = bigquery.Table(f"{client.project}.{dataset_id}.{table_id}", schema=schema)
table = client.create_table(table, exists_ok=True)Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery