tessl/pypi-google-cloud-bigquery

Google BigQuery API client library for Python providing comprehensive data warehouse and analytics capabilities

—

Pending

Overview

Eval results

Files

Client Operations

Name: tessl/pypi-google-cloud-bigquery
Author: tessl

Core client functionality for authentication, project management, and resource operations. The Client class serves as the main entry point for all BigQuery interactions, providing authenticated access to Google Cloud BigQuery services.

Capabilities

Client Initialization

Creates the primary interface to BigQuery with authentication and configuration options.

class Client:
    def __init__(
        self,
        project: str = None,
        credentials: google.auth.credentials.Credentials = None,
        _http: requests.Session = None,
        location: str = None,
        default_query_job_config: QueryJobConfig = None,
        default_load_job_config: LoadJobConfig = None,
        client_info: google.api_core.client_info.ClientInfo = None,
        client_options: google.api_core.client_options.ClientOptions = None,
    ):
        """
        Initialize BigQuery client.

        Args:
            project: Google Cloud project ID. If None, inferred from environment.
            credentials: OAuth2 credentials. If None, uses default credentials.
            location: Default location for BigQuery operations.
            default_query_job_config: Default configuration for query jobs.
            default_load_job_config: Default configuration for load jobs.
            client_info: Client library information.
            client_options: Client configuration options.
        """

    @property
    def project(self) -> str:
        """Project ID associated with this client."""

    @property
    def location(self) -> str:
        """Default location for BigQuery operations."""

Query Execution

Execute SQL queries and manage query jobs with comprehensive configuration options.

def query(
    self,
    query: str,
    job_config: QueryJobConfig = None,
    job_id: str = None,
    job_retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
    location: str = None,
    project: str = None,
) -> QueryJob:
    """
    Execute a SQL query and return a job.

    Args:
        query: SQL query string to execute.
        job_config: Configuration for the query job.
        job_id: Unique identifier for the job.
        job_retry: Retry configuration for job creation.
        timeout: Timeout in seconds for job creation.
        location: Location where job should run.
        project: Project ID for the job.

    Returns:
        QueryJob: Job instance for the query operation.
    """

def query_and_wait(
    self,
    query: str, 
    **kwargs
) -> google.cloud.bigquery.table.RowIterator:
    """
    Execute query and wait for completion, returning results directly.
    
    Args:
        query: SQL query string to execute.
        **kwargs: Additional arguments passed to query().
        
    Returns:
        RowIterator: Query results.
    """

Dataset Operations

Manage BigQuery datasets including creation, deletion, and listing operations.

def create_dataset(
    self,
    dataset: Union[Dataset, DatasetReference, str],
    exists_ok: bool = False,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Dataset:
    """
    Create a new dataset.

    Args:
        dataset: Dataset to create.
        exists_ok: If True, do not raise error if dataset already exists.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Dataset: The created dataset.
    """

def delete_dataset(
    self,
    dataset: Union[Dataset, DatasetReference, str],
    delete_contents: bool = False,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> None:
    """
    Delete a dataset.

    Args:
        dataset: Dataset to delete.
        delete_contents: If True, delete all tables in dataset.
        retry: Retry configuration.
        timeout: Timeout in seconds.
    """

def get_dataset(
    self,
    dataset_ref: Union[DatasetReference, str],
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Dataset:
    """
    Fetch dataset metadata.

    Args:
        dataset_ref: Reference to dataset to fetch.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Dataset: The requested dataset.
    """

def list_datasets(
    self,
    project: str = None,
    include_all: bool = False,
    filter: str = None,
    max_results: int = None,
    page_token: str = None,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> google.api_core.page_iterator.Iterator[Dataset]:
    """
    List datasets in a project.

    Args:
        project: Project ID to list datasets from.
        include_all: Include hidden datasets.
        filter: Label filter expression.
        max_results: Maximum datasets to return.
        page_token: Token for pagination.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Iterator[Dataset]: Iterator of datasets.
    """

def update_dataset(
    self,
    dataset: Dataset,
    fields: List[str],
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Dataset:
    """
    Update dataset metadata.

    Args:
        dataset: Dataset with updated metadata.
        fields: Fields to update.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Dataset: Updated dataset.
    """

Table Operations

Manage BigQuery tables including creation, deletion, and metadata operations.

def create_table(
    self,
    table: Union[Table, TableReference, str],
    exists_ok: bool = False,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Table:
    """
    Create a new table.

    Args:
        table: Table to create.
        exists_ok: If True, do not raise error if table already exists.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Table: The created table.
    """

def delete_table(
    self,
    table: Union[Table, TableReference, str],
    not_found_ok: bool = False,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> None:
    """
    Delete a table.

    Args:
        table: Table to delete.
        not_found_ok: If True, do not raise error if table not found.
        retry: Retry configuration.
        timeout: Timeout in seconds.
    """

def get_table(
    self,
    table: Union[Table, TableReference, str],
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Table:
    """
    Fetch table metadata.

    Args:
        table: Reference to table to fetch.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Table: The requested table.
    """

def list_tables(
    self,
    dataset: Union[Dataset, DatasetReference, str],
    max_results: int = None,
    page_token: str = None,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> google.api_core.page_iterator.Iterator[Table]:
    """
    List tables in a dataset.

    Args:
        dataset: Dataset to list tables from.
        max_results: Maximum tables to return.
        page_token: Token for pagination.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Iterator[Table]: Iterator of tables.
    """

def update_table(
    self,
    table: Table,
    fields: List[str],
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Table:
    """
    Update table metadata.

    Args:
        table: Table with updated metadata.
        fields: Fields to update.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Table: Updated table.
    """

Job Management

Monitor and control BigQuery jobs including queries, loads, extracts, and copies.

def get_job(
    self,
    job_id: str,
    project: str = None,
    location: str = None,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> Union[QueryJob, LoadJob, ExtractJob, CopyJob, UnknownJob]:
    """
    Fetch job metadata.

    Args:
        job_id: Unique identifier for the job.
        project: Project ID where job was created.
        location: Location where job was created.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Job: The requested job instance.
    """

def list_jobs(
    self,
    project: str = None,
    parent_job: str = None,
    state_filter: str = None,
    min_creation_time: datetime.datetime = None,
    max_creation_time: datetime.datetime = None,
    max_results: int = None,
    page_token: str = None,
    all_users: bool = None,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> google.api_core.page_iterator.Iterator:
    """
    List jobs in a project.

    Args:
        project: Project ID to list jobs from.
        parent_job: Parent job ID for script jobs.
        state_filter: Filter by job state ('done', 'pending', 'running').
        min_creation_time: Minimum job creation time.
        max_creation_time: Maximum job creation time.
        max_results: Maximum jobs to return.
        page_token: Token for pagination.
        all_users: Include jobs from all users.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        Iterator: Iterator of job instances.
    """

def cancel_job(
    self,
    job_id: str,
    project: str = None,
    location: str = None,
    retry: google.api_core.retry.Retry = DEFAULT_RETRY,
    timeout: float = None,
) -> bool:
    """
    Cancel a job.

    Args:
        job_id: Unique identifier for the job.
        project: Project ID where job was created.
        location: Location where job was created.
        retry: Retry configuration.
        timeout: Timeout in seconds.

    Returns:
        bool: True if cancellation was successful.
    """

Data Transfer Operations

Load and extract data to/from BigQuery tables with various configuration options.

def load_table_from_uri(
    self,
    source_uris: Union[str, List[str]],
    destination: Union[Table, TableReference, str],
    job_config: LoadJobConfig = None,
    **kwargs
) -> LoadJob:
    """
    Load data from Cloud Storage URIs.

    Args:
        source_uris: Cloud Storage URIs to load from.
        destination: Destination table.
        job_config: Configuration for the load job.

    Returns:
        LoadJob: Job instance for the load operation.
    """

def load_table_from_file(
    self,
    file_obj: typing.BinaryIO,
    destination: Union[Table, TableReference, str],
    rewind: bool = False,
    size: int = None,
    num_retries: int = 6,
    job_config: LoadJobConfig = None,
    **kwargs
) -> LoadJob:
    """
    Load data from a file object.

    Args:
        file_obj: File-like object to load from.
        destination: Destination table.
        rewind: Whether to rewind file before loading.
        size: Number of bytes to load.
        num_retries: Number of upload retries.
        job_config: Configuration for the load job.

    Returns:
        LoadJob: Job instance for the load operation.
    """

def extract_table(
    self,
    source: Union[Table, TableReference, str],
    destination_uris: Union[str, List[str]],
    job_config: ExtractJobConfig = None,
    **kwargs
) -> ExtractJob:
    """
    Extract data from a table to Cloud Storage.

    Args:
        source: Source table to extract from.
        destination_uris: Cloud Storage URIs to extract to.
        job_config: Configuration for the extract job.

    Returns:
        ExtractJob: Job instance for the extract operation.
    """

Usage Examples

Basic Client Setup

from google.cloud import bigquery

# Use default credentials and project
client = bigquery.Client()

# Specify project explicitly
client = bigquery.Client(project="my-project-id")

# Use service account credentials
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file(
    "path/to/service-account-key.json"
)
client = bigquery.Client(credentials=credentials, project="my-project-id")

Resource Management

# Create a dataset
dataset_id = "my_new_dataset"
dataset = bigquery.Dataset(f"{client.project}.{dataset_id}")
dataset.location = "US"
dataset = client.create_dataset(dataset, exists_ok=True)

# List all datasets
datasets = list(client.list_datasets())
for dataset in datasets:
    print(dataset.dataset_id)

# Create a table with schema
table_id = "my_new_table"
schema = [
    bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
    bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
]

table = bigquery.Table(f"{client.project}.{dataset_id}.{table_id}", schema=schema)
table = client.create_table(table, exists_ok=True)

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-bigquery

docs

client-operations.md

data-loading.md

database-api.md

dataset-management.md

tessl/pypi-google-cloud-bigquery