0
# Metadata Import and Export
1
2
Import metadata from external sources and export metastore data to Google Cloud Storage. Supports various database formats including MySQL and PostgreSQL dumps with comprehensive validation, error handling, and progress tracking for large-scale data migration scenarios.
3
4
## Capabilities
5
6
### List Metadata Imports
7
8
Retrieve all metadata import operations for a metastore service with filtering and pagination support.
9
10
```python { .api }
11
def list_metadata_imports(
12
self,
13
request: Optional[ListMetadataImportsRequest] = None,
14
*,
15
parent: Optional[str] = None,
16
retry: OptionalRetry = gapic_v1.method.DEFAULT,
17
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
18
metadata: Sequence[Tuple[str, str]] = ()
19
) -> pagers.ListMetadataImportsPager:
20
"""
21
Lists imports in a service.
22
23
Args:
24
request: The request object containing list parameters
25
parent: Required. The relative resource name of the service
26
Format: projects/{project_id}/locations/{location_id}/services/{service_id}
27
retry: Retry configuration for the request
28
timeout: Request timeout in seconds
29
metadata: Additional metadata for the request
30
31
Returns:
32
ListMetadataImportsPager: Pageable list of metadata imports
33
34
Raises:
35
google.api_core.exceptions.GoogleAPICallError: If the request fails
36
"""
37
```
38
39
Usage example:
40
41
```python
42
from google.cloud import metastore
43
44
client = metastore.DataprocMetastoreClient()
45
parent = "projects/my-project/locations/us-central1/services/my-metastore"
46
47
# List all imports
48
for metadata_import in client.list_metadata_imports(parent=parent):
49
print(f"Import: {metadata_import.name}")
50
print(f"State: {metadata_import.state.name}")
51
print(f"Database dump: {metadata_import.database_dump.gcs_uri}")
52
53
# Filter by state
54
request = metastore.ListMetadataImportsRequest(
55
parent=parent,
56
filter="state=SUCCEEDED",
57
order_by="create_time desc"
58
)
59
```
60
61
### Get Metadata Import
62
63
Retrieve detailed information about a specific metadata import operation including progress and error details.
64
65
```python { .api }
66
def get_metadata_import(
67
self,
68
request: Optional[GetMetadataImportRequest] = None,
69
*,
70
name: Optional[str] = None,
71
retry: OptionalRetry = gapic_v1.method.DEFAULT,
72
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
73
metadata: Sequence[Tuple[str, str]] = ()
74
) -> MetadataImport:
75
"""
76
Gets details of a single import.
77
78
Args:
79
request: The request object
80
name: Required. The relative resource name of the metadata import
81
Format: projects/{project_id}/locations/{location_id}/services/{service_id}/metadataImports/{import_id}
82
retry: Retry configuration
83
timeout: Request timeout in seconds
84
metadata: Additional metadata
85
86
Returns:
87
MetadataImport: The metadata import resource
88
89
Raises:
90
google.api_core.exceptions.NotFound: If the import doesn't exist
91
"""
92
```
93
94
### Create Metadata Import
95
96
Import metadata from external database dumps stored in Google Cloud Storage.
97
98
```python { .api }
99
def create_metadata_import(
100
self,
101
request: Optional[CreateMetadataImportRequest] = None,
102
*,
103
parent: Optional[str] = None,
104
metadata_import: Optional[MetadataImport] = None,
105
metadata_import_id: Optional[str] = None,
106
retry: OptionalRetry = gapic_v1.method.DEFAULT,
107
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
108
metadata: Sequence[Tuple[str, str]] = ()
109
) -> operation.Operation:
110
"""
111
Creates a new MetadataImport in a given project and location.
112
113
Args:
114
request: The request object
115
parent: Required. The relative resource name of the service
116
metadata_import: Required. The metadata import configuration
117
metadata_import_id: Required. The ID to use for the import
118
retry: Retry configuration
119
timeout: Request timeout in seconds
120
metadata: Additional metadata
121
122
Returns:
123
Operation: Long-running operation for metadata import
124
125
Raises:
126
google.api_core.exceptions.AlreadyExists: If import_id already exists
127
google.api_core.exceptions.InvalidArgument: If configuration is invalid
128
"""
129
```
130
131
Usage example:
132
133
```python
134
from google.cloud import metastore
135
136
client = metastore.DataprocMetastoreClient()
137
138
# Import from MySQL dump
139
import_config = metastore.MetadataImport(
140
description="Import production MySQL metastore data",
141
database_dump=metastore.MetadataImport.DatabaseDump(
142
gcs_uri="gs://my-bucket/metastore-dumps/prod-metastore-20240115.sql",
143
database_type=metastore.MetadataImport.DatabaseDump.DatabaseType.MYSQL
144
)
145
)
146
147
operation = client.create_metadata_import(
148
parent="projects/my-project/locations/us-central1/services/my-metastore",
149
metadata_import_id="mysql-import-20240115",
150
metadata_import=import_config
151
)
152
153
# Monitor import progress
154
print("Starting metadata import...")
155
result = operation.result(timeout=7200) # Can take up to 2 hours for large dumps
156
print(f"Import completed: {result.name}")
157
```
158
159
### Update Metadata Import
160
161
Update metadata import configuration such as description and labels.
162
163
```python { .api }
164
def update_metadata_import(
165
self,
166
request: Optional[UpdateMetadataImportRequest] = None,
167
*,
168
metadata_import: Optional[MetadataImport] = None,
169
update_mask: Optional[field_mask_pb2.FieldMask] = None,
170
retry: OptionalRetry = gapic_v1.method.DEFAULT,
171
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
172
metadata: Sequence[Tuple[str, str]] = ()
173
) -> operation.Operation:
174
"""
175
Updates a single import.
176
177
Args:
178
request: The request object
179
metadata_import: Required. The import to update
180
update_mask: Required. Field mask specifying which fields to update
181
retry: Retry configuration
182
timeout: Request timeout in seconds
183
metadata: Additional metadata
184
185
Returns:
186
Operation: Long-running operation for import update
187
188
Raises:
189
google.api_core.exceptions.NotFound: If the import doesn't exist
190
"""
191
```
192
193
### Export Metadata
194
195
Export metastore metadata to Google Cloud Storage in various formats.
196
197
```python { .api }
198
def export_metadata(
199
self,
200
request: Optional[ExportMetadataRequest] = None,
201
*,
202
service: Optional[str] = None,
203
retry: OptionalRetry = gapic_v1.method.DEFAULT,
204
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
205
metadata: Sequence[Tuple[str, str]] = ()
206
) -> operation.Operation:
207
"""
208
Exports metadata from a service.
209
210
Args:
211
request: The request object
212
service: Required. The relative resource name of the service
213
retry: Retry configuration
214
timeout: Request timeout in seconds
215
metadata: Additional metadata
216
217
Returns:
218
Operation: Long-running operation for metadata export
219
220
Raises:
221
google.api_core.exceptions.NotFound: If the service doesn't exist
222
google.api_core.exceptions.FailedPrecondition: If export cannot be performed
223
"""
224
```
225
226
Usage example:
227
228
```python
229
from google.cloud import metastore
230
231
client = metastore.DataprocMetastoreClient()
232
233
# Export to Cloud Storage
234
export_request = metastore.ExportMetadataRequest(
235
service="projects/my-project/locations/us-central1/services/my-metastore",
236
destination_gcs_uri="gs://my-exports/metastore-export-20240115/",
237
database_dump_type=metastore.DatabaseDumpSpec.Type.MYSQL
238
)
239
240
operation = client.export_metadata(request=export_request)
241
242
# Wait for export completion
243
print("Starting metadata export...")
244
metadata_export = operation.result(timeout=3600)
245
print(f"Export completed to: {metadata_export.destination_gcs_uri}")
246
```
247
248
## Core Types
249
250
### Metadata Import Resource
251
252
```python { .api }
253
class MetadataImport:
254
name: str
255
description: str
256
create_time: timestamp_pb2.Timestamp
257
update_time: timestamp_pb2.Timestamp
258
end_time: timestamp_pb2.Timestamp
259
state: State
260
database_dump: DatabaseDump
261
262
class State(enum.Enum):
263
STATE_UNSPECIFIED = 0
264
RUNNING = 1
265
SUCCEEDED = 2
266
UPDATING = 3
267
FAILED = 4
268
269
class DatabaseDump:
270
gcs_uri: str
271
database_type: DatabaseType
272
type: Optional[str] # Deprecated
273
274
class DatabaseType(enum.Enum):
275
DATABASE_TYPE_UNSPECIFIED = 0
276
MYSQL = 1
277
POSTGRESQL = 2
278
```
279
280
### Metadata Export Resource
281
282
```python { .api }
283
class MetadataExport:
284
destination_gcs_uri: str
285
start_time: timestamp_pb2.Timestamp
286
end_time: timestamp_pb2.Timestamp
287
state: State
288
database_dump_type: DatabaseDumpSpec.Type
289
290
class State(enum.Enum):
291
STATE_UNSPECIFIED = 0
292
RUNNING = 1
293
SUCCEEDED = 2
294
FAILED = 3
295
CANCELLED = 4
296
```
297
298
### Database Dump Specification
299
300
```python { .api }
301
class DatabaseDumpSpec:
302
gcs_uri: str
303
type: Type
304
305
class Type(enum.Enum):
306
TYPE_UNSPECIFIED = 0
307
MYSQL = 1
308
POSTGRESQL = 2
309
```
310
311
### Request/Response Types
312
313
```python { .api }
314
class ListMetadataImportsRequest:
315
parent: str
316
page_size: int
317
page_token: str
318
filter: str
319
order_by: str
320
321
class ListMetadataImportsResponse:
322
metadata_imports: List[MetadataImport]
323
next_page_token: str
324
unreachable: List[str]
325
326
class GetMetadataImportRequest:
327
name: str
328
329
class CreateMetadataImportRequest:
330
parent: str
331
metadata_import_id: str
332
metadata_import: MetadataImport
333
request_id: str
334
335
class UpdateMetadataImportRequest:
336
update_mask: field_mask_pb2.FieldMask
337
metadata_import: MetadataImport
338
request_id: str
339
340
class ExportMetadataRequest:
341
service: str
342
destination_gcs_uri: str
343
request_id: str
344
database_dump_type: DatabaseDumpSpec.Type
345
```
346
347
## Usage Patterns
348
349
### Large-Scale Migration Workflow
350
351
```python
352
from google.cloud import metastore, storage
353
import logging
354
from typing import List
355
356
class MetastoreMigrator:
357
def __init__(self, project_id: str, location: str, service_id: str):
358
self.metastore_client = metastore.DataprocMetastoreClient()
359
self.storage_client = storage.Client()
360
self.service_name = f"projects/{project_id}/locations/{location}/services/{service_id}"
361
362
def import_from_multiple_dumps(self, dump_uris: List[str]) -> List[str]:
363
"""Import metadata from multiple database dumps."""
364
import_operations = []
365
366
for i, dump_uri in enumerate(dump_uris):
367
import_config = metastore.MetadataImport(
368
description=f"Batch import {i+1} of {len(dump_uris)}",
369
database_dump=metastore.MetadataImport.DatabaseDump(
370
gcs_uri=dump_uri,
371
database_type=metastore.MetadataImport.DatabaseDump.DatabaseType.MYSQL
372
)
373
)
374
375
operation = self.metastore_client.create_metadata_import(
376
parent=self.service_name,
377
metadata_import_id=f"batch-import-{i+1:03d}",
378
metadata_import=import_config
379
)
380
381
import_operations.append(operation.name)
382
logging.info(f"Started import {i+1}: {operation.name}")
383
384
return import_operations
385
386
def wait_for_imports(self, operation_names: List[str]):
387
"""Wait for all import operations to complete."""
388
completed = 0
389
total = len(operation_names)
390
391
while completed < total:
392
for op_name in operation_names:
393
# Check operation status
394
# Implementation would use operations client
395
pass
396
397
time.sleep(60) # Check every minute
398
logging.info(f"Import progress: {completed}/{total} completed")
399
```
400
401
### Export with Validation
402
403
```python
404
def export_with_validation(service_name: str, export_bucket: str):
405
"""Export metadata with validation steps."""
406
client = metastore.DataprocMetastoreClient()
407
408
# Create timestamped export location
409
export_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
410
export_uri = f"gs://{export_bucket}/exports/{export_timestamp}/"
411
412
# Start export
413
export_request = metastore.ExportMetadataRequest(
414
service=service_name,
415
destination_gcs_uri=export_uri,
416
database_dump_type=metastore.DatabaseDumpSpec.Type.MYSQL
417
)
418
419
operation = client.export_metadata(request=export_request)
420
421
try:
422
# Wait for export completion
423
result = operation.result(timeout=3600)
424
425
# Validate export files exist in Cloud Storage
426
storage_client = storage.Client()
427
bucket = storage_client.bucket(export_bucket)
428
429
export_files = list(bucket.list_blobs(prefix=f"exports/{export_timestamp}/"))
430
if not export_files:
431
raise ValueError("Export completed but no files found in Cloud Storage")
432
433
total_size = sum(blob.size for blob in export_files)
434
logging.info(f"Export validated: {len(export_files)} files, {total_size} bytes")
435
436
return export_uri
437
438
except Exception as e:
439
logging.error(f"Export failed or validation error: {e}")
440
raise
441
```