Google Cloud Dataproc Metastore API client library for managing fully managed, highly available metastore services
npx @tessl/cli install tessl/pypi-google-cloud-dataproc-metastore@1.19.0A Python client library for Google Cloud Dataproc Metastore, a fully managed, highly available, autoscaled, autohealing, OSS-native metastore service that greatly simplifies technical metadata management. Built on Apache Hive metastore, it serves as a critical component for enterprise data lakes.
pip install google-cloud-dataproc-metastorefrom google.cloud import metastoreVersion-specific imports:
from google.cloud import metastore_v1
from google.cloud import metastore_v1alpha
from google.cloud import metastore_v1betafrom google.cloud import metastore
# Initialize the client
client = metastore.DataprocMetastoreClient()
# List all metastore services in a location
parent = "projects/my-project/locations/us-central1"
services = client.list_services(parent=parent)
for service in services:
print(f"Service: {service.name}")
print(f"State: {service.state}")
print(f"Endpoint URI: {service.endpoint_uri}")
# Get a specific service
service_name = "projects/my-project/locations/us-central1/services/my-metastore"
service = client.get_service(name=service_name)
print(f"Service tier: {service.tier}")
print(f"Hive version: {service.hive_metastore_config.version}")
# Create a new backup
backup_request = metastore.CreateBackupRequest(
parent="projects/my-project/locations/us-central1/services/my-metastore",
backup_id="my-backup",
backup=metastore.Backup(
description="Automated backup for disaster recovery"
)
)
operation = client.create_backup(request=backup_request)
backup = operation.result() # Wait for completion
print(f"Backup created: {backup.name}")The Google Cloud Dataproc Metastore client library follows Google's standard client library patterns:
Comprehensive lifecycle management for Dataproc Metastore services including creation, configuration, updates, and deletion. Supports multiple service tiers and Hive metastore versions with advanced networking and security options.
class DataprocMetastoreClient:
def list_services(self, request=None, *, parent=None, **kwargs): ...
def get_service(self, request=None, *, name=None, **kwargs): ...
def create_service(self, request=None, *, parent=None, service=None, service_id=None, **kwargs): ...
def update_service(self, request=None, *, service=None, update_mask=None, **kwargs): ...
def delete_service(self, request=None, *, name=None, **kwargs): ...Complete backup and restore functionality for metastore services including scheduled backups, point-in-time recovery, and cross-region backup management for disaster recovery scenarios.
class DataprocMetastoreClient:
def list_backups(self, request=None, *, parent=None, **kwargs): ...
def get_backup(self, request=None, *, name=None, **kwargs): ...
def create_backup(self, request=None, *, parent=None, backup=None, backup_id=None, **kwargs): ...
def delete_backup(self, request=None, *, name=None, **kwargs): ...
def restore_service(self, request=None, *, service=None, backup=None, **kwargs): ...Import metadata from external sources and export metastore data to Google Cloud Storage. Supports various database formats including MySQL and PostgreSQL dumps with comprehensive validation and error handling.
class DataprocMetastoreClient:
def list_metadata_imports(self, request=None, *, parent=None, **kwargs): ...
def get_metadata_import(self, request=None, *, name=None, **kwargs): ...
def create_metadata_import(self, request=None, *, parent=None, metadata_import=None, metadata_import_id=None, **kwargs): ...
def update_metadata_import(self, request=None, *, metadata_import=None, update_mask=None, **kwargs): ...
def export_metadata(self, request=None, *, service=None, **kwargs): ...Manage metastore federation services that provide unified access to multiple backend metastores. Supports cross-cloud and multi-region federation scenarios for enterprise data lake architectures.
class DataprocMetastoreFederationClient:
def list_federations(self, request=None, *, parent=None, **kwargs): ...
def get_federation(self, request=None, *, name=None, **kwargs): ...
def create_federation(self, request=None, *, parent=None, federation=None, federation_id=None, **kwargs): ...
def update_federation(self, request=None, *, federation=None, update_mask=None, **kwargs): ...
def delete_federation(self, request=None, *, name=None, **kwargs): ...Execute Hive and Spark SQL queries directly against metastore metadata for advanced analytics and metadata management operations including table movement and resource location management.
class DataprocMetastoreClient:
def query_metadata(self, request=None, *, service=None, query=None, **kwargs): ...
def move_table_to_database(self, request=None, *, service=None, table_name=None, db_name=None, destination_db_name=None, **kwargs): ...
def alter_metadata_resource_location(self, request=None, *, service=None, resource_name=None, location_uri=None, **kwargs): ...Asynchronous client implementations for all operations with full async/await support, enabling high-performance concurrent operations and integration with async Python frameworks.
class DataprocMetastoreAsyncClient:
async def list_services(self, request=None, *, parent=None, **kwargs): ...
async def get_service(self, request=None, *, name=None, **kwargs): ...
async def create_service(self, request=None, *, parent=None, service=None, service_id=None, **kwargs): ...
# ... all methods have async equivalents# Service states
class Service:
class State(enum.Enum):
CREATING = 1
ACTIVE = 2
SUSPENDING = 3
SUSPENDED = 4
UPDATING = 5
DELETING = 6
ERROR = 7
class Tier(enum.Enum):
DEVELOPER = 1
ENTERPRISE = 3
class ReleaseChannel(enum.Enum):
CANARY = 1
STABLE = 2
# Configuration classes
class HiveMetastoreConfig:
version: str
config_overrides: Dict[str, str]
kerberos_config: Optional[KerberosConfig]
auxiliary_versions: List[AuxiliaryVersionConfig]
class NetworkConfig:
consumers: List[NetworkConsumer]
enable_private_ip: bool
class EncryptionConfig:
kms_key: str
# Resource path helpers
def service_path(project: str, location: str, service: str) -> str: ...
def backup_path(project: str, location: str, service: str, backup: str) -> str: ...
def federation_path(project: str, location: str, federation: str) -> str: ...