Google Cloud Bigquery Storage API client library for high-performance streaming read/write access to BigQuery data
npx @tessl/cli install tessl/pypi-google-cloud-bigquery-storage@2.33.0A high-performance Python client library for the Google BigQuery Storage API that enables efficient streaming of large datasets from BigQuery tables. The library provides streaming read capabilities with support for multiple data formats (Avro, Arrow, Protocol Buffers), streaming write operations with transactional semantics, and integration with popular data analysis frameworks like pandas and pyarrow.
pip install google-cloud-bigquery-storagepip install google-cloud-bigquery-storage[fastavro] - for Avro format supportpip install google-cloud-bigquery-storage[pyarrow] - for Arrow format supportpip install google-cloud-bigquery-storage[pandas] - for pandas DataFrame supportfrom google.cloud import bigquery_storageImport specific clients and types:
from google.cloud.bigquery_storage import BigQueryReadClient, BigQueryWriteClient, BigQueryWriteAsyncClient
from google.cloud.bigquery_storage import types
from google.cloud.bigquery_storage import ReadRowsStream, AppendRowsStream
# Access package version
import google.cloud.bigquery_storage
print(google.cloud.bigquery_storage.__version__)Access beta/alpha and v1 APIs:
# Explicit v1 API access
from google.cloud import bigquery_storage_v1
# Beta version for metastore services
from google.cloud import bigquery_storage_v1beta
from google.cloud import bigquery_storage_v1beta2
# Alpha version for experimental features
from google.cloud import bigquery_storage_v1alphafrom google.cloud.bigquery_storage import BigQueryReadClient, types
# Create client
client = BigQueryReadClient()
# Configure read session
table = "projects/your-project/datasets/your_dataset/tables/your_table"
requested_session = types.ReadSession(
table=table,
data_format=types.DataFormat.AVRO
)
# Create read session
session = client.create_read_session(
parent="projects/your-project",
read_session=requested_session,
max_stream_count=1
)
# Read data
reader = client.read_rows(session.streams[0].name)
for row in reader.rows(session):
print(row)from google.cloud.bigquery_storage import BigQueryWriteClient, types
# Create client
client = BigQueryWriteClient()
# Create write stream
parent = client.table_path("your-project", "your_dataset", "your_table")
write_stream = types.WriteStream(type_=types.WriteStream.Type.PENDING)
stream = client.create_write_stream(parent=parent, write_stream=write_stream)
# Append data (requires protocol buffer serialized data)
request = types.AppendRowsRequest(write_stream=stream.name)
# ... configure with serialized row data
response = client.append_rows([request])The BigQuery Storage API uses a streaming architecture designed for high-performance data transfer:
ReadRowsStream, AppendRowsStream) for easier stream managementThis design enables:
High-performance streaming reads from BigQuery tables with support for parallel processing, column selection, row filtering, and multiple data formats. Includes conversion utilities for pandas and Arrow. Available in both synchronous and asynchronous versions.
class BigQueryReadClient:
def create_read_session(
self,
parent: str,
read_session: ReadSession,
max_stream_count: int = None
) -> ReadSession: ...
def read_rows(self, name: str, offset: int = 0) -> ReadRowsStream: ...
def split_read_stream(
self,
name: str,
fraction: float = None
) -> SplitReadStreamResponse: ...
class BigQueryReadAsyncClient:
async def create_read_session(
self,
parent: str,
read_session: ReadSession,
max_stream_count: int = None
) -> ReadSession: ...
def read_rows(self, name: str, offset: int = 0) -> ReadRowsStream: ...
async def split_read_stream(
self,
name: str,
fraction: float = None
) -> SplitReadStreamResponse: ...Streaming write operations with support for multiple write stream types, transactional semantics, and batch commit operations. Supports Protocol Buffer, Avro, and Arrow data formats.
class BigQueryWriteClient:
def create_write_stream(
self,
parent: str,
write_stream: WriteStream
) -> WriteStream: ...
def append_rows(
self,
requests: Iterator[AppendRowsRequest]
) -> Iterator[AppendRowsResponse]: ...
def finalize_write_stream(self, name: str) -> FinalizeWriteStreamResponse: ...
def batch_commit_write_streams(
self,
parent: str,
write_streams: List[str]
) -> BatchCommitWriteStreamsResponse: ...Comprehensive type system for BigQuery Storage operations including session configuration, stream management, data formats, error handling, and schema definitions.
class DataFormat(enum.Enum):
AVRO = 1
ARROW = 2
PROTO = 3
class ReadSession:
table: str
data_format: DataFormat
read_options: TableReadOptions
streams: List[ReadStream]
class WriteStream:
name: str
type_: WriteStream.Type
create_time: Timestamp
state: WriteStream.StateBeta and alpha services for managing BigQuery external table metastore partitions. Supports batch operations for creating, updating, deleting, and listing Hive-style partitions in external tables.
class MetastorePartitionServiceClient:
def batch_create_metastore_partitions(
self,
parent: str,
requests: List[CreateMetastorePartitionRequest]
) -> BatchCreateMetastorePartitionsResponse: ...
def batch_delete_metastore_partitions(
self,
parent: str,
partition_names: List[str]
) -> None: ...
def list_metastore_partitions(
self,
parent: str,
filter: str = None
) -> List[MetastorePartition]: ...