0
# Metastore Services
1
2
Beta and alpha services for managing BigQuery external table metastore partitions. These services enable batch operations on Hive-style partitions in external tables, supporting creation, updating, deletion, and listing of partition metadata.
3
4
**Note**: These services are available in beta (`v1beta`) and alpha (`v1alpha`) versions. API stability is not guaranteed and breaking changes may occur.
5
6
## Capabilities
7
8
### Metastore Partition Service Client
9
10
Client for managing metastore partitions in BigQuery external tables.
11
12
```python { .api }
13
class MetastorePartitionServiceClient:
14
def __init__(self, **kwargs):
15
"""
16
Initialize Metastore Partition Service Client.
17
18
Parameters:
19
- credentials: Google Cloud credentials
20
- project: Default project ID
21
- client_info: Client library information
22
"""
23
24
def batch_create_metastore_partitions(
25
self,
26
parent: str,
27
requests: List[CreateMetastorePartitionRequest],
28
**kwargs
29
) -> BatchCreateMetastorePartitionsResponse:
30
"""
31
Create multiple metastore partitions in a single batch operation.
32
33
Parameters:
34
- parent: Table path
35
- requests: List of partition creation requests
36
37
Returns:
38
BatchCreateMetastorePartitionsResponse with created partitions
39
"""
40
41
def batch_delete_metastore_partitions(
42
self,
43
parent: str,
44
partition_names: List[str],
45
**kwargs
46
) -> None:
47
"""
48
Delete multiple metastore partitions in a batch operation.
49
50
Parameters:
51
- parent: Table path
52
- partition_names: List of partition names to delete
53
"""
54
55
def batch_update_metastore_partitions(
56
self,
57
parent: str,
58
requests: List[UpdateMetastorePartitionRequest],
59
**kwargs
60
) -> BatchUpdateMetastorePartitionsResponse:
61
"""
62
Update multiple metastore partitions in a batch operation.
63
64
Parameters:
65
- parent: Table path
66
- requests: List of partition update requests
67
68
Returns:
69
BatchUpdateMetastorePartitionsResponse with updated partitions
70
"""
71
72
def create_metastore_partition(
73
self,
74
parent: str,
75
metastore_partition: MetastorePartition,
76
**kwargs
77
) -> MetastorePartition:
78
"""
79
Create a single metastore partition.
80
81
Parameters:
82
- parent: Table path
83
- metastore_partition: Partition definition
84
85
Returns:
86
Created MetastorePartition
87
"""
88
89
def list_metastore_partitions(
90
self,
91
parent: str,
92
filter: str = None,
93
**kwargs
94
) -> List[MetastorePartition]:
95
"""
96
List metastore partitions for a table.
97
98
Parameters:
99
- parent: Table path
100
- filter: Optional filter expression
101
102
Returns:
103
List of MetastorePartition objects
104
"""
105
106
def stream_metastore_partitions(
107
self,
108
parent: str,
109
metastore_partitions: List[MetastorePartition],
110
**kwargs
111
) -> Iterator[StreamMetastorePartitionsResponse]:
112
"""
113
Stream metastore partitions for processing.
114
115
Parameters:
116
- parent: Table path
117
- metastore_partitions: Partitions to stream
118
119
Returns:
120
Iterator of StreamMetastorePartitionsResponse
121
"""
122
123
def update_metastore_partition(
124
self,
125
metastore_partition: MetastorePartition,
126
update_mask: FieldMask = None,
127
**kwargs
128
) -> MetastorePartition:
129
"""
130
Update a single metastore partition.
131
132
Parameters:
133
- metastore_partition: Updated partition definition
134
- update_mask: Fields to update
135
136
Returns:
137
Updated MetastorePartition
138
"""
139
```
140
141
### Metastore Partition Service Async Client
142
143
Async version of MetastorePartitionServiceClient with same methods using async/await pattern.
144
145
```python { .api }
146
class MetastorePartitionServiceAsyncClient:
147
async def batch_create_metastore_partitions(
148
self,
149
parent: str,
150
requests: List[CreateMetastorePartitionRequest],
151
**kwargs
152
) -> BatchCreateMetastorePartitionsResponse: ...
153
154
async def batch_delete_metastore_partitions(
155
self,
156
parent: str,
157
partition_names: List[str],
158
**kwargs
159
) -> None: ...
160
161
async def batch_update_metastore_partitions(
162
self,
163
parent: str,
164
requests: List[UpdateMetastorePartitionRequest],
165
**kwargs
166
) -> BatchUpdateMetastorePartitionsResponse: ...
167
168
async def create_metastore_partition(
169
self,
170
parent: str,
171
metastore_partition: MetastorePartition,
172
**kwargs
173
) -> MetastorePartition: ...
174
175
async def list_metastore_partitions(
176
self,
177
parent: str,
178
filter: str = None,
179
**kwargs
180
) -> List[MetastorePartition]: ...
181
182
async def stream_metastore_partitions(
183
self,
184
parent: str,
185
metastore_partitions: List[MetastorePartition],
186
**kwargs
187
) -> AsyncIterator[StreamMetastorePartitionsResponse]: ...
188
189
async def update_metastore_partition(
190
self,
191
metastore_partition: MetastorePartition,
192
update_mask: FieldMask = None,
193
**kwargs
194
) -> MetastorePartition: ...
195
```
196
197
## Usage Examples
198
199
### Basic Partition Creation
200
201
```python
202
from google.cloud import bigquery_storage_v1beta
203
from google.cloud.bigquery_storage_v1beta import types
204
205
# Create client (beta version)
206
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
207
208
# Define table path
209
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
210
211
# Create partition definition
212
partition = types.MetastorePartition(
213
values=["2023", "01", "15"], # Partition values for year/month/day
214
storage_descriptor=types.StorageDescriptor(
215
location_uri="gs://my-bucket/data/year=2023/month=01/day=15/",
216
input_format="org.apache.hadoop.mapred.TextInputFormat",
217
output_format="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
218
)
219
)
220
221
# Create single partition
222
created_partition = client.create_metastore_partition(
223
parent=parent,
224
metastore_partition=partition
225
)
226
227
print(f"Created partition: {created_partition.partition_name}")
228
```
229
230
### Batch Partition Operations
231
232
```python
233
from google.cloud import bigquery_storage_v1beta
234
from google.cloud.bigquery_storage_v1beta import types
235
236
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
237
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
238
239
# Create multiple partitions in batch
240
create_requests = []
241
for day in range(1, 8): # Create week of partitions
242
partition = types.MetastorePartition(
243
values=["2023", "01", f"{day:02d}"],
244
storage_descriptor=types.StorageDescriptor(
245
location_uri=f"gs://my-bucket/data/year=2023/month=01/day={day:02d}/",
246
input_format="org.apache.hadoop.mapred.TextInputFormat",
247
output_format="org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
248
)
249
)
250
251
request = types.CreateMetastorePartitionRequest(
252
parent=parent,
253
metastore_partition=partition
254
)
255
create_requests.append(request)
256
257
# Batch create
258
response = client.batch_create_metastore_partitions(
259
parent=parent,
260
requests=create_requests
261
)
262
263
print(f"Created {len(response.partitions)} partitions")
264
265
# List all partitions
266
partitions = client.list_metastore_partitions(parent=parent)
267
for partition in partitions:
268
print(f"Partition: {partition.partition_name}, Location: {partition.storage_descriptor.location_uri}")
269
```
270
271
### Partition Updates and Deletion
272
273
```python
274
from google.cloud import bigquery_storage_v1beta
275
from google.cloud.bigquery_storage_v1beta import types
276
from google.protobuf import field_mask_pb2
277
278
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
279
parent = "projects/my-project/datasets/my_dataset/tables/my_external_table"
280
281
# Update partition location
282
partition_to_update = types.MetastorePartition(
283
partition_name="projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=01",
284
storage_descriptor=types.StorageDescriptor(
285
location_uri="gs://my-new-bucket/data/year=2023/month=01/day=01/"
286
)
287
)
288
289
update_mask = field_mask_pb2.FieldMask()
290
update_mask.paths.append("storage_descriptor.location_uri")
291
292
updated_partition = client.update_metastore_partition(
293
metastore_partition=partition_to_update,
294
update_mask=update_mask
295
)
296
297
# Delete specific partitions
298
partition_names_to_delete = [
299
"projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=06",
300
"projects/my-project/datasets/my_dataset/tables/my_external_table/partitions/year=2023/month=01/day=07"
301
]
302
303
client.batch_delete_metastore_partitions(
304
parent=parent,
305
partition_names=partition_names_to_delete
306
)
307
308
print("Deleted specified partitions")
309
```
310
311
### Using Alpha Version
312
313
```python
314
# Alpha version has identical API but may have newer features
315
from google.cloud import bigquery_storage_v1alpha
316
from google.cloud.bigquery_storage_v1alpha import types
317
318
# Same usage pattern as beta
319
client = bigquery_storage_v1alpha.MetastorePartitionServiceClient()
320
# ... same operations as beta version
321
```
322
323
## Types
324
325
### Core Metastore Types
326
327
```python { .api }
328
class MetastorePartition:
329
"""Metastore partition definition."""
330
values: List[str] # Partition column values
331
create_time: Timestamp # Creation timestamp
332
update_time: Timestamp # Last update timestamp
333
storage_descriptor: StorageDescriptor # Storage information
334
partition_name: str # Full partition name
335
336
class MetastorePartitionList:
337
"""List of metastore partitions."""
338
partitions: List[MetastorePartition] # Partition list
339
next_page_token: str # Pagination token
340
341
class MetastorePartitionValues:
342
"""Partition column values."""
343
values: List[str] # Column values
344
```
345
346
### Storage and Schema Types
347
348
```python { .api }
349
class StorageDescriptor:
350
"""Storage descriptor for partition data."""
351
location_uri: str # Data location URI
352
input_format: str # Input format class
353
output_format: str # Output format class
354
serde_info: SerDeInfo # Serialization info
355
356
class SerDeInfo:
357
"""Serialization/deserialization information."""
358
serialization_library: str # SerDe library class
359
parameters: Dict[str, str] # SerDe parameters
360
361
class FieldSchema:
362
"""Field schema definition."""
363
name: str # Field name
364
type_: str # Field type
365
description: str # Field description
366
```
367
368
### Request/Response Types
369
370
```python { .api }
371
class CreateMetastorePartitionRequest:
372
"""Request to create a metastore partition."""
373
parent: str # Table path
374
metastore_partition: MetastorePartition # Partition to create
375
376
class UpdateMetastorePartitionRequest:
377
"""Request to update a metastore partition."""
378
metastore_partition: MetastorePartition # Updated partition
379
update_mask: FieldMask # Fields to update
380
381
class BatchCreateMetastorePartitionsRequest:
382
"""Request to create multiple partitions."""
383
parent: str # Table path
384
requests: List[CreateMetastorePartitionRequest] # Creation requests
385
386
class BatchCreateMetastorePartitionsResponse:
387
"""Response with created partitions."""
388
partitions: List[MetastorePartition] # Created partitions
389
390
class BatchUpdateMetastorePartitionsRequest:
391
"""Request to update multiple partitions."""
392
parent: str # Table path
393
requests: List[UpdateMetastorePartitionRequest] # Update requests
394
395
class BatchUpdateMetastorePartitionsResponse:
396
"""Response with updated partitions."""
397
partitions: List[MetastorePartition] # Updated partitions
398
399
class BatchDeleteMetastorePartitionsRequest:
400
"""Request to delete multiple partitions."""
401
parent: str # Table path
402
partition_names: List[str] # Names of partitions to delete
403
404
class ListMetastorePartitionsRequest:
405
"""Request to list metastore partitions."""
406
parent: str # Table path
407
page_size: int # Page size for pagination
408
page_token: str # Page token for pagination
409
filter: str # Filter expression
410
411
class ListMetastorePartitionsResponse:
412
"""Response with partition list."""
413
partitions: List[MetastorePartition] # Partition list
414
next_page_token: str # Next page token
415
416
class StreamMetastorePartitionsRequest:
417
"""Request to stream metastore partitions."""
418
parent: str # Table path
419
metastore_partitions: List[MetastorePartition] # Partitions to stream
420
421
class StreamMetastorePartitionsResponse:
422
"""Response from streaming partitions."""
423
total_partitions_streamed: int # Number of partitions streamed
424
```
425
426
### Error Types
427
428
```python { .api }
429
class BatchSizeTooLargeError:
430
"""Error when batch operation size exceeds limits."""
431
message: str # Error message
432
max_batch_size: int # Maximum allowed batch size
433
```
434
435
## Import Patterns
436
437
### Beta Version Access
438
439
```python
440
# Full module import
441
from google.cloud import bigquery_storage_v1beta
442
443
# Specific imports
444
from google.cloud.bigquery_storage_v1beta import MetastorePartitionServiceClient
445
from google.cloud.bigquery_storage_v1beta import types
446
447
# Client creation
448
client = bigquery_storage_v1beta.MetastorePartitionServiceClient()
449
```
450
451
### Alpha Version Access
452
453
```python
454
# Full module import
455
from google.cloud import bigquery_storage_v1alpha
456
457
# Specific imports
458
from google.cloud.bigquery_storage_v1alpha import MetastorePartitionServiceClient
459
from google.cloud.bigquery_storage_v1alpha import types
460
461
# Client creation
462
client = bigquery_storage_v1alpha.MetastorePartitionServiceClient()
463
```
464
465
## Version Notes
466
467
- **v1beta**: Stable beta API with comprehensive metastore partition management
468
- **v1alpha**: Alpha API with potential experimental features and breaking changes
469
- Both versions provide identical core functionality for partition operations
470
- Choose beta for production workloads, alpha for testing new features
471
- API may be promoted to v1 stable in future releases