or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

async-operations.mdbackup-restore.mdfederation-management.mdindex.mdmetadata-import-export.mdmetadata-query.mdservice-management.md

metadata-import-export.mddocs/

0

# Metadata Import and Export

1

2

Import metadata from external sources and export metastore data to Google Cloud Storage. Supports various database formats including MySQL and PostgreSQL dumps with comprehensive validation, error handling, and progress tracking for large-scale data migration scenarios.

3

4

## Capabilities

5

6

### List Metadata Imports

7

8

Retrieve all metadata import operations for a metastore service with filtering and pagination support.

9

10

```python { .api }

11

def list_metadata_imports(

12

self,

13

request: Optional[ListMetadataImportsRequest] = None,

14

*,

15

parent: Optional[str] = None,

16

retry: OptionalRetry = gapic_v1.method.DEFAULT,

17

timeout: Union[float, object] = gapic_v1.method.DEFAULT,

18

metadata: Sequence[Tuple[str, str]] = ()

19

) -> pagers.ListMetadataImportsPager:

20

"""

21

Lists imports in a service.

22

23

Args:

24

request: The request object containing list parameters

25

parent: Required. The relative resource name of the service

26

Format: projects/{project_id}/locations/{location_id}/services/{service_id}

27

retry: Retry configuration for the request

28

timeout: Request timeout in seconds

29

metadata: Additional metadata for the request

30

31

Returns:

32

ListMetadataImportsPager: Pageable list of metadata imports

33

34

Raises:

35

google.api_core.exceptions.GoogleAPICallError: If the request fails

36

"""

37

```

38

39

Usage example:

40

41

```python

42

from google.cloud import metastore

43

44

client = metastore.DataprocMetastoreClient()

45

parent = "projects/my-project/locations/us-central1/services/my-metastore"

46

47

# List all imports

48

for metadata_import in client.list_metadata_imports(parent=parent):

49

print(f"Import: {metadata_import.name}")

50

print(f"State: {metadata_import.state.name}")

51

print(f"Database dump: {metadata_import.database_dump.gcs_uri}")

52

53

# Filter by state

54

request = metastore.ListMetadataImportsRequest(

55

parent=parent,

56

filter="state=SUCCEEDED",

57

order_by="create_time desc"

58

)

59

```

60

61

### Get Metadata Import

62

63

Retrieve detailed information about a specific metadata import operation including progress and error details.

64

65

```python { .api }

66

def get_metadata_import(

67

self,

68

request: Optional[GetMetadataImportRequest] = None,

69

*,

70

name: Optional[str] = None,

71

retry: OptionalRetry = gapic_v1.method.DEFAULT,

72

timeout: Union[float, object] = gapic_v1.method.DEFAULT,

73

metadata: Sequence[Tuple[str, str]] = ()

74

) -> MetadataImport:

75

"""

76

Gets details of a single import.

77

78

Args:

79

request: The request object

80

name: Required. The relative resource name of the metadata import

81

Format: projects/{project_id}/locations/{location_id}/services/{service_id}/metadataImports/{import_id}

82

retry: Retry configuration

83

timeout: Request timeout in seconds

84

metadata: Additional metadata

85

86

Returns:

87

MetadataImport: The metadata import resource

88

89

Raises:

90

google.api_core.exceptions.NotFound: If the import doesn't exist

91

"""

92

```

93

94

### Create Metadata Import

95

96

Import metadata from external database dumps stored in Google Cloud Storage.

97

98

```python { .api }

99

def create_metadata_import(

100

self,

101

request: Optional[CreateMetadataImportRequest] = None,

102

*,

103

parent: Optional[str] = None,

104

metadata_import: Optional[MetadataImport] = None,

105

metadata_import_id: Optional[str] = None,

106

retry: OptionalRetry = gapic_v1.method.DEFAULT,

107

timeout: Union[float, object] = gapic_v1.method.DEFAULT,

108

metadata: Sequence[Tuple[str, str]] = ()

109

) -> operation.Operation:

110

"""

111

Creates a new MetadataImport in a given project and location.

112

113

Args:

114

request: The request object

115

parent: Required. The relative resource name of the service

116

metadata_import: Required. The metadata import configuration

117

metadata_import_id: Required. The ID to use for the import

118

retry: Retry configuration

119

timeout: Request timeout in seconds

120

metadata: Additional metadata

121

122

Returns:

123

Operation: Long-running operation for metadata import

124

125

Raises:

126

google.api_core.exceptions.AlreadyExists: If import_id already exists

127

google.api_core.exceptions.InvalidArgument: If configuration is invalid

128

"""

129

```

130

131

Usage example:

132

133

```python

134

from google.cloud import metastore

135

136

client = metastore.DataprocMetastoreClient()

137

138

# Import from MySQL dump

139

import_config = metastore.MetadataImport(

140

description="Import production MySQL metastore data",

141

database_dump=metastore.MetadataImport.DatabaseDump(

142

gcs_uri="gs://my-bucket/metastore-dumps/prod-metastore-20240115.sql",

143

database_type=metastore.MetadataImport.DatabaseDump.DatabaseType.MYSQL

144

)

145

)

146

147

operation = client.create_metadata_import(

148

parent="projects/my-project/locations/us-central1/services/my-metastore",

149

metadata_import_id="mysql-import-20240115",

150

metadata_import=import_config

151

)

152

153

# Monitor import progress

154

print("Starting metadata import...")

155

result = operation.result(timeout=7200) # Can take up to 2 hours for large dumps

156

print(f"Import completed: {result.name}")

157

```

158

159

### Update Metadata Import

160

161

Update metadata import configuration such as description and labels.

162

163

```python { .api }

164

def update_metadata_import(

165

self,

166

request: Optional[UpdateMetadataImportRequest] = None,

167

*,

168

metadata_import: Optional[MetadataImport] = None,

169

update_mask: Optional[field_mask_pb2.FieldMask] = None,

170

retry: OptionalRetry = gapic_v1.method.DEFAULT,

171

timeout: Union[float, object] = gapic_v1.method.DEFAULT,

172

metadata: Sequence[Tuple[str, str]] = ()

173

) -> operation.Operation:

174

"""

175

Updates a single import.

176

177

Args:

178

request: The request object

179

metadata_import: Required. The import to update

180

update_mask: Required. Field mask specifying which fields to update

181

retry: Retry configuration

182

timeout: Request timeout in seconds

183

metadata: Additional metadata

184

185

Returns:

186

Operation: Long-running operation for import update

187

188

Raises:

189

google.api_core.exceptions.NotFound: If the import doesn't exist

190

"""

191

```

192

193

### Export Metadata

194

195

Export metastore metadata to Google Cloud Storage in various formats.

196

197

```python { .api }

198

def export_metadata(

199

self,

200

request: Optional[ExportMetadataRequest] = None,

201

*,

202

service: Optional[str] = None,

203

retry: OptionalRetry = gapic_v1.method.DEFAULT,

204

timeout: Union[float, object] = gapic_v1.method.DEFAULT,

205

metadata: Sequence[Tuple[str, str]] = ()

206

) -> operation.Operation:

207

"""

208

Exports metadata from a service.

209

210

Args:

211

request: The request object

212

service: Required. The relative resource name of the service

213

retry: Retry configuration

214

timeout: Request timeout in seconds

215

metadata: Additional metadata

216

217

Returns:

218

Operation: Long-running operation for metadata export

219

220

Raises:

221

google.api_core.exceptions.NotFound: If the service doesn't exist

222

google.api_core.exceptions.FailedPrecondition: If export cannot be performed

223

"""

224

```

225

226

Usage example:

227

228

```python

229

from google.cloud import metastore

230

231

client = metastore.DataprocMetastoreClient()

232

233

# Export to Cloud Storage

234

export_request = metastore.ExportMetadataRequest(

235

service="projects/my-project/locations/us-central1/services/my-metastore",

236

destination_gcs_uri="gs://my-exports/metastore-export-20240115/",

237

database_dump_type=metastore.DatabaseDumpSpec.Type.MYSQL

238

)

239

240

operation = client.export_metadata(request=export_request)

241

242

# Wait for export completion

243

print("Starting metadata export...")

244

metadata_export = operation.result(timeout=3600)

245

print(f"Export completed to: {metadata_export.destination_gcs_uri}")

246

```

247

248

## Core Types

249

250

### Metadata Import Resource

251

252

```python { .api }

253

class MetadataImport:

254

name: str

255

description: str

256

create_time: timestamp_pb2.Timestamp

257

update_time: timestamp_pb2.Timestamp

258

end_time: timestamp_pb2.Timestamp

259

state: State

260

database_dump: DatabaseDump

261

262

class State(enum.Enum):

263

STATE_UNSPECIFIED = 0

264

RUNNING = 1

265

SUCCEEDED = 2

266

UPDATING = 3

267

FAILED = 4

268

269

class DatabaseDump:

270

gcs_uri: str

271

database_type: DatabaseType

272

type: Optional[str] # Deprecated

273

274

class DatabaseType(enum.Enum):

275

DATABASE_TYPE_UNSPECIFIED = 0

276

MYSQL = 1

277

POSTGRESQL = 2

278

```

279

280

### Metadata Export Resource

281

282

```python { .api }

283

class MetadataExport:

284

destination_gcs_uri: str

285

start_time: timestamp_pb2.Timestamp

286

end_time: timestamp_pb2.Timestamp

287

state: State

288

database_dump_type: DatabaseDumpSpec.Type

289

290

class State(enum.Enum):

291

STATE_UNSPECIFIED = 0

292

RUNNING = 1

293

SUCCEEDED = 2

294

FAILED = 3

295

CANCELLED = 4

296

```

297

298

### Database Dump Specification

299

300

```python { .api }

301

class DatabaseDumpSpec:

302

gcs_uri: str

303

type: Type

304

305

class Type(enum.Enum):

306

TYPE_UNSPECIFIED = 0

307

MYSQL = 1

308

POSTGRESQL = 2

309

```

310

311

### Request/Response Types

312

313

```python { .api }

314

class ListMetadataImportsRequest:

315

parent: str

316

page_size: int

317

page_token: str

318

filter: str

319

order_by: str

320

321

class ListMetadataImportsResponse:

322

metadata_imports: List[MetadataImport]

323

next_page_token: str

324

unreachable: List[str]

325

326

class GetMetadataImportRequest:

327

name: str

328

329

class CreateMetadataImportRequest:

330

parent: str

331

metadata_import_id: str

332

metadata_import: MetadataImport

333

request_id: str

334

335

class UpdateMetadataImportRequest:

336

update_mask: field_mask_pb2.FieldMask

337

metadata_import: MetadataImport

338

request_id: str

339

340

class ExportMetadataRequest:

341

service: str

342

destination_gcs_uri: str

343

request_id: str

344

database_dump_type: DatabaseDumpSpec.Type

345

```

346

347

## Usage Patterns

348

349

### Large-Scale Migration Workflow

350

351

```python

352

from google.cloud import metastore, storage

353

import logging

354

from typing import List

355

356

class MetastoreMigrator:

357

def __init__(self, project_id: str, location: str, service_id: str):

358

self.metastore_client = metastore.DataprocMetastoreClient()

359

self.storage_client = storage.Client()

360

self.service_name = f"projects/{project_id}/locations/{location}/services/{service_id}"

361

362

def import_from_multiple_dumps(self, dump_uris: List[str]) -> List[str]:

363

"""Import metadata from multiple database dumps."""

364

import_operations = []

365

366

for i, dump_uri in enumerate(dump_uris):

367

import_config = metastore.MetadataImport(

368

description=f"Batch import {i+1} of {len(dump_uris)}",

369

database_dump=metastore.MetadataImport.DatabaseDump(

370

gcs_uri=dump_uri,

371

database_type=metastore.MetadataImport.DatabaseDump.DatabaseType.MYSQL

372

)

373

)

374

375

operation = self.metastore_client.create_metadata_import(

376

parent=self.service_name,

377

metadata_import_id=f"batch-import-{i+1:03d}",

378

metadata_import=import_config

379

)

380

381

import_operations.append(operation.name)

382

logging.info(f"Started import {i+1}: {operation.name}")

383

384

return import_operations

385

386

def wait_for_imports(self, operation_names: List[str]):

387

"""Wait for all import operations to complete."""

388

completed = 0

389

total = len(operation_names)

390

391

while completed < total:

392

for op_name in operation_names:

393

# Check operation status

394

# Implementation would use operations client

395

pass

396

397

time.sleep(60) # Check every minute

398

logging.info(f"Import progress: {completed}/{total} completed")

399

```

400

401

### Export with Validation

402

403

```python

404

def export_with_validation(service_name: str, export_bucket: str):

405

"""Export metadata with validation steps."""

406

client = metastore.DataprocMetastoreClient()

407

408

# Create timestamped export location

409

export_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

410

export_uri = f"gs://{export_bucket}/exports/{export_timestamp}/"

411

412

# Start export

413

export_request = metastore.ExportMetadataRequest(

414

service=service_name,

415

destination_gcs_uri=export_uri,

416

database_dump_type=metastore.DatabaseDumpSpec.Type.MYSQL

417

)

418

419

operation = client.export_metadata(request=export_request)

420

421

try:

422

# Wait for export completion

423

result = operation.result(timeout=3600)

424

425

# Validate export files exist in Cloud Storage

426

storage_client = storage.Client()

427

bucket = storage_client.bucket(export_bucket)

428

429

export_files = list(bucket.list_blobs(prefix=f"exports/{export_timestamp}/"))

430

if not export_files:

431

raise ValueError("Export completed but no files found in Cloud Storage")

432

433

total_size = sum(blob.size for blob in export_files)

434

logging.info(f"Export validated: {len(export_files)} files, {total_size} bytes")

435

436

return export_uri

437

438

except Exception as e:

439

logging.error(f"Export failed or validation error: {e}")

440

raise

441

```