Google Cloud Bigquery Storage API client library for high-performance streaming read/write access to BigQuery data
—
Beta and alpha services for managing BigQuery external table metastore partitions. These services enable batch operations on Hive-style partitions in external tables, supporting creation, updating, deletion, and listing of partition metadata.
Note: These services are available in beta (v1beta) and alpha (v1alpha) versions. API stability is not guaranteed and breaking changes may occur.
Client for managing metastore partitions in BigQuery external tables.
class MetastorePartitionServiceClient:
def __init__(self, **kwargs):
"""
Initialize Metastore Partition Service Client.
Parameters:
- credentials: Google Cloud credentials
- project: Default project ID
- client_info: Client library information
"""
def batch_create_metastore_partitions(
self,
parent: str,
requests: List[CreateMetastorePartitionRequest],
**kwargs
) -> BatchCreateMetastorePartitionsResponse:
"""
Create multiple metastore partitions in a single batch operation.
Parameters:
- parent: Table path
- requests: List of partition creation requests
Returns:
BatchCreateMetastorePartitionsResponse with created partitions
"""
def batch_delete_metastore_partitions(
self,
parent: str,
partition_names: List[str],
**kwargs
) -> None:
"""
Delete multiple metastore partitions in a batch operation.
Parameters:
- parent: Table path
- partition_names: List of partition names to delete
"""
def batch_update_metastore_partitions(
self,
parent: str,
requests: List[UpdateMetastorePartitionRequest],
**kwargs
) -> BatchUpdateMetastorePartitionsResponse:
"""
Update multiple metastore partitions in a batch operation.
Parameters:
- parent: Table path
- requests: List of partition update requests
Returns:
BatchUpdateMetastorePartitionsResponse with updated partitions
"""
def create_metastore_partition(
self,
parent: str,
metastore_partition: MetastorePartition,
**kwargs
) -> MetastorePartition:
"""
Create a single metastore partition.
Parameters:
- parent: Table path
- metastore_partition: Partition definition
Returns:
Created MetastorePartition
"""
def list_metastore_partitions(
self,
parent: str,
filter: str = None,
**kwargs
) -> List[MetastorePartition]:
"""
List metastore partitions for a table.
Parameters:
- parent: Table path
- filter: Optional filter expression
Returns:
List of MetastorePartition objects
"""
def stream_metastore_partitions(
self,
parent: str,
metastore_partitions: List[MetastorePartition],
**kwargs
) -> Iterator[StreamMetastorePartitionsResponse]:
"""
Stream metastore partitions for processing.
Parameters:
- parent: Table path
- metastore_partitions: Partitions to stream
Returns:
Iterator of StreamMetastorePartitionsResponse
"""
def update_metastore_partition(
self,
metastore_partition: MetastorePartition,
update_mask: FieldMask = None,
**kwargs
) -> MetastorePartition:
"""
Update a single metastore partition.
Parameters:
- metastore_partition: Updated partition definition
- update_mask: Fields to update
Returns:
Updated MetastorePartition
"""Async version of MetastorePartitionServiceClient with same methods using async/await pattern.
class MetastorePartitionServiceAsyncClient:
async def batch_create_metastore_partitions(
self,
parent: str,
requests: List[CreateMetastorePartitionRequest],
**kwargs
) -> BatchCreateMetastorePartitionsResponse: ...
async def batch_delete_metastore_partitions(
self,
parent: str,
partition_names: List[str],
**kwargs
) -> None: ...
async def batch_update_metastore_partitions(
self,
parent: str,
requests: List[UpdateMetastorePartitionRequest],
**kwargs
) -> BatchUpdateMetastorePartitionsResponse: ...
async def create_metastore_partition(
self,
parent: str,
metastore_partition: MetastorePartition,
**kwargs
) -> MetastorePartition: ...
async def list_metastore_partitions(
self,
parent: str,
filter: str = None,
**kwargs
) -> List[MetastorePartition]: ...
async def stream_metastore_partitions(
self,
parent: str,
metastore_partitions: List[MetastorePartition],
**kwargs
) -> AsyncIterator[StreamMetastorePartitionsResponse]: ...
async def update_metastore_partition(
self,
metastore_partition: MetastorePartition,
update_mask: FieldMask = None,
**kwargs
) -> MetastorePartition: ...from google.cloud import bigquery_storage_v1beta
from google.cloud.bigquery_storage_v1beta import types
# Create client (beta version)
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
# Define table path
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
# Create partition definition
partition = types.MetastorePartition(
values=["2023", "01", "15"], # Partition values for year/month/day
storage_descriptor=types.StorageDescriptor(
location_uri="gs://my-bucket/data/year=2023/month=01/day=15/",
input_format="org.apache.hadoop.mapred.TextInputFormat",
output_format="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
)
)
# Create single partition
created_partition = client.create_metastore_partition(
parent=parent,
metastore_partition=partition
)
print(f"Created partition: {created_partition.partition_name}")from google.cloud import bigquery_storage_v1beta
from google.cloud.bigquery_storage_v1beta import types
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
# Create multiple partitions in batch
create_requests = []
for day in range(1, 8): # Create week of partitions
partition = types.MetastorePartition(
values=["2023", "01", f"{day:02d}"],
storage_descriptor=types.StorageDescriptor(
location_uri=f"gs://my-bucket/data/year=2023/month=01/day={day:02d}/",
input_format="org.apache.hadoop.mapred.TextInputFormat",
output_format="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
)
)
request = types.CreateMetastorePartitionRequest(
parent=parent,
metastore_partition=partition
)
create_requests.append(request)
# Batch create
response = client.batch_create_metastore_partitions(
parent=parent,
requests=create_requests
)
print(f"Created {len(response.partitions)} partitions")
# List all partitions
partitions = client.list_metastore_partitions(parent=parent)
for partition in partitions:
print(f"Partition: {partition.partition_name}, Location: {partition.storage_descriptor.location_uri}")from google.cloud import bigquery_storage_v1beta
from google.cloud.bigquery_storage_v1beta import types
from google.protobuf import field_mask_pb2
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
# Update partition location
partition_to_update = types.MetastorePartition(
partition_name="projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=01",
storage_descriptor=types.StorageDescriptor(
location_uri="gs://my-new-bucket/data/year=2023/month=01/day=01/"
)
)
update_mask = field_mask_pb2.FieldMask()
update_mask.paths.append("storage_descriptor.location_uri")
updated_partition = client.update_metastore_partition(
metastore_partition=partition_to_update,
update_mask=update_mask
)
# Delete specific partitions
partition_names_to_delete = [
"projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=06",
"projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=07"
]
client.batch_delete_metastore_partitions(
parent=parent,
partition_names=partition_names_to_delete
)
print("Deleted specified partitions")# Alpha version has identical API but may have newer features
from google.cloud import bigquery_storage_v1alpha
from google.cloud.bigquery_storage_v1alpha import types
# Same usage pattern as beta
client = bigquery_storage_v1alpha.MetastorePartitionServiceClient()
# ... same operations as beta versionclass MetastorePartition:
"""Metastore partition definition."""
values: List[str] # Partition column values
create_time: Timestamp # Creation timestamp
update_time: Timestamp # Last update timestamp
storage_descriptor: StorageDescriptor # Storage information
partition_name: str # Full partition name
class MetastorePartitionList:
"""List of metastore partitions."""
partitions: List[MetastorePartition] # Partition list
next_page_token: str # Pagination token
class MetastorePartitionValues:
"""Partition column values."""
values: List[str] # Column valuesclass StorageDescriptor:
"""Storage descriptor for partition data."""
location_uri: str # Data location URI
input_format: str # Input format class
output_format: str # Output format class
serde_info: SerDeInfo # Serialization info
class SerDeInfo:
"""Serialization/deserialization information."""
serialization_library: str # SerDe library class
parameters: Dict[str, str] # SerDe parameters
class FieldSchema:
"""Field schema definition."""
name: str # Field name
type_: str # Field type
description: str # Field descriptionclass CreateMetastorePartitionRequest:
"""Request to create a metastore partition."""
parent: str # Table path
metastore_partition: MetastorePartition # Partition to create
class UpdateMetastorePartitionRequest:
"""Request to update a metastore partition."""
metastore_partition: MetastorePartition # Updated partition
update_mask: FieldMask # Fields to update
class BatchCreateMetastorePartitionsRequest:
"""Request to create multiple partitions."""
parent: str # Table path
requests: List[CreateMetastorePartitionRequest] # Creation requests
class BatchCreateMetastorePartitionsResponse:
"""Response with created partitions."""
partitions: List[MetastorePartition] # Created partitions
class BatchUpdateMetastorePartitionsRequest:
"""Request to update multiple partitions."""
parent: str # Table path
requests: List[UpdateMetastorePartitionRequest] # Update requests
class BatchUpdateMetastorePartitionsResponse:
"""Response with updated partitions."""
partitions: List[MetastorePartition] # Updated partitions
class BatchDeleteMetastorePartitionsRequest:
"""Request to delete multiple partitions."""
parent: str # Table path
partition_names: List[str] # Names of partitions to delete
class ListMetastorePartitionsRequest:
"""Request to list metastore partitions."""
parent: str # Table path
page_size: int # Page size for pagination
page_token: str # Page token for pagination
filter: str # Filter expression
class ListMetastorePartitionsResponse:
"""Response with partition list."""
partitions: List[MetastorePartition] # Partition list
next_page_token: str # Next page token
class StreamMetastorePartitionsRequest:
"""Request to stream metastore partitions."""
parent: str # Table path
metastore_partitions: List[MetastorePartition] # Partitions to stream
class StreamMetastorePartitionsResponse:
"""Response from streaming partitions."""
total_partitions_streamed: int # Number of partitions streamedclass BatchSizeTooLargeError:
"""Error when batch operation size exceeds limits."""
message: str # Error message
max_batch_size: int # Maximum allowed batch size# Full module import
from google.cloud import bigquery_storage_v1beta
# Specific imports
from google.cloud.bigquery_storage_v1beta import MetastorePartitionServiceClient
from google.cloud.bigquery_storage_v1beta import types
# Client creation
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()# Full module import
from google.cloud import bigquery_storage_v1alpha
# Specific imports
from google.cloud.bigquery_storage_v1alpha import MetastorePartitionServiceClient
from google.cloud.bigquery_storage_v1alpha import types
# Client creation
client = bigquery_storage_v1alpha.MetastorePartitionServiceClient()Install with Tessl CLI
npx tessl i tessl/pypi-google-cloud-bigquery-storage