CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-datacatalog

Google Cloud Datacatalog API client library for data discovery and metadata management

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

data-catalog.mddocs/

Data Catalog Management

Core catalog operations including search, entry groups, entries, and catalog configuration. This is the primary interface for discovering and managing metadata about data assets across Google Cloud services.

Capabilities

Catalog Search

Search for resources across the entire Data Catalog, supporting complex queries with filtering by resource type, properties, and tags.

def search_catalog(
    self, 
    request: SearchCatalogRequest = None,
    *, 
    scope: SearchCatalogRequest.Scope = None,
    query: str = None,
    **kwargs
) -> SearchCatalogPager:
    """
    Search Data Catalog for resources that match a query.

    Args:
        request: The request object containing search parameters
        scope: SearchCatalogRequest.Scope - Required search scope with include_org_ids, include_project_ids, or include_gcp_public_datasets
        query: str - Required search query using Data Catalog search syntax

    Returns:
        SearchCatalogPager: Pager for iterating through SearchCatalogResult objects

    Raises:
        google.api_core.exceptions.InvalidArgument: Invalid search query or scope
        google.api_core.exceptions.PermissionDenied: Insufficient permissions
    """

Usage Example:

from google.cloud import datacatalog_v1

client = datacatalog_v1.DataCatalogClient()

# Search for tables containing "customer" in name or description
search_request = datacatalog_v1.SearchCatalogRequest(
    scope=datacatalog_v1.SearchCatalogRequest.Scope(
        include_org_ids=["123456789"]
    ),
    query='type=table AND (name:customer OR description:customer)',
    page_size=10
)

for result in client.search_catalog(request=search_request):
    print(f"Found: {result.relative_resource_name}")
    print(f"Type: {result.search_result_type}")
    print(f"Display Name: {result.display_name}")

Entry Group Management

Entry groups provide logical containers for organizing related entries, typically grouping entries from the same data source or project.

def create_entry_group(
    self,
    request: CreateEntryGroupRequest = None,
    *,
    parent: str = None,
    entry_group_id: str = None,
    entry_group: EntryGroup = None,
    **kwargs
) -> EntryGroup:
    """
    Create an entry group.

    Args:
        request: The request object
        parent: str - Required. Format: projects/{project}/locations/{location}
        entry_group_id: str - Required. ID of the entry group to create
        entry_group: EntryGroup - Required. The entry group to create

    Returns:
        EntryGroup: The created entry group

    Raises:
        google.api_core.exceptions.AlreadyExists: Entry group already exists
        google.api_core.exceptions.InvalidArgument: Invalid parameters
    """

def get_entry_group(
    self,
    request: GetEntryGroupRequest = None,
    *,
    name: str = None,
    read_mask: field_mask_pb2.FieldMask = None,
    **kwargs
) -> EntryGroup:
    """
    Get an entry group.

    Args:
        request: The request object
        name: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}
        read_mask: FieldMask - Optional. Fields to return

    Returns:
        EntryGroup: The requested entry group
    """

def update_entry_group(
    self,
    request: UpdateEntryGroupRequest = None,
    *,
    entry_group: EntryGroup = None,
    update_mask: field_mask_pb2.FieldMask = None,
    **kwargs
) -> EntryGroup:
    """
    Update an entry group.

    Args:
        request: The request object
        entry_group: EntryGroup - Required. Updated entry group
        update_mask: FieldMask - Optional. Fields to update

    Returns:
        EntryGroup: The updated entry group
    """

def delete_entry_group(
    self,
    request: DeleteEntryGroupRequest = None,
    *,
    name: str = None,
    force: bool = None,
    **kwargs
) -> None:
    """
    Delete an entry group.

    Args:
        request: The request object
        name: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}
        force: bool - Optional. Force delete even if group contains entries
    """

def list_entry_groups(
    self,
    request: ListEntryGroupsRequest = None,
    *,
    parent: str = None,
    page_size: int = None,
    page_token: str = None,
    **kwargs
) -> ListEntryGroupsPager:
    """
    List entry groups.

    Args:
        request: The request object
        parent: str - Required. Format: projects/{project}/locations/{location}
        page_size: int - Optional. Maximum number of results per page
        page_token: str - Optional. Token for pagination

    Returns:
        ListEntryGroupsPager: Pager for iterating through EntryGroup objects
    """

Entry Management

Entries represent individual data assets and their metadata, including schema information, business context, and system properties.

def create_entry(
    self,
    request: CreateEntryRequest = None,
    *,
    parent: str = None,
    entry_id: str = None,
    entry: Entry = None,
    **kwargs
) -> Entry:
    """
    Create an entry.

    Args:
        request: The request object
        parent: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}
        entry_id: str - Required. ID of the entry to create
        entry: Entry - Required. The entry to create

    Returns:
        Entry: The created entry
    """

def get_entry(
    self,
    request: GetEntryRequest = None,
    *,
    name: str = None,
    **kwargs
) -> Entry:
    """
    Get an entry.

    Args:
        request: The request object
        name: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}/entries/{entry}

    Returns:
        Entry: The requested entry
    """

def update_entry(
    self,
    request: UpdateEntryRequest = None,
    *,
    entry: Entry = None,
    update_mask: field_mask_pb2.FieldMask = None,
    **kwargs
) -> Entry:
    """
    Update an entry.

    Args:
        request: The request object
        entry: Entry - Required. Updated entry
        update_mask: FieldMask - Optional. Fields to update

    Returns:
        Entry: The updated entry
    """

def delete_entry(
    self,
    request: DeleteEntryRequest = None,
    *,
    name: str = None,
    **kwargs
) -> None:
    """
    Delete an entry.

    Args:
        request: The request object
        name: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}/entries/{entry}
    """

def list_entries(
    self,
    request: ListEntriesRequest = None,
    *,
    parent: str = None,
    page_size: int = None,
    page_token: str = None,
    read_mask: field_mask_pb2.FieldMask = None,
    **kwargs
) -> ListEntriesPager:
    """
    List entries.

    Args:
        request: The request object
        parent: str - Required. Format: projects/{project}/locations/{location}/entryGroups/{entry_group}
        page_size: int - Optional. Maximum number of results per page
        page_token: str - Optional. Token for pagination
        read_mask: FieldMask - Optional. Fields to return

    Returns:
        ListEntriesPager: Pager for iterating through Entry objects
    """

def lookup_entry(
    self,
    request: LookupEntryRequest = None,
    *,
    linked_resource: str = None,
    sql_resource: str = None,
    fully_qualified_name: str = None,
    project: str = None,
    location: str = None,
    **kwargs
) -> Entry:
    """
    Get an entry by its target resource name.

    Args:
        request: The request object
        linked_resource: str - Optional. Full resource name
        sql_resource: str - Optional. SQL name of the entry
        fully_qualified_name: str - Optional. Fully qualified name
        project: str - Optional. Project ID
        location: str - Optional. Location

    Returns:
        Entry: The entry for the target resource

    Raises:
        google.api_core.exceptions.NotFound: Entry not found
    """

Usage Example:

from google.cloud import datacatalog_v1

client = datacatalog_v1.DataCatalogClient()

# Create entry group
entry_group = datacatalog_v1.EntryGroup(
    display_name="Customer Data",
    description="Customer-related data assets"
)

create_group_request = datacatalog_v1.CreateEntryGroupRequest(
    parent="projects/my-project/locations/us-central1",
    entry_group_id="customer-data",
    entry_group=entry_group
)

group = client.create_entry_group(request=create_group_request)

# Create entry
entry = datacatalog_v1.Entry(
    display_name="Customer Table",
    description="Main customer information table",
    type_=datacatalog_v1.EntryType.TABLE,
    linked_resource="//bigquery.googleapis.com/projects/my-project/datasets/customer/tables/customers"
)

create_entry_request = datacatalog_v1.CreateEntryRequest(
    parent=group.name,
    entry_id="customer-table",
    entry=entry
)

created_entry = client.create_entry(request=create_entry_request)

# Lookup entry by resource
lookup_request = datacatalog_v1.LookupEntryRequest(
    linked_resource="//bigquery.googleapis.com/projects/my-project/datasets/customer/tables/customers"
)
found_entry = client.lookup_entry(request=lookup_request)

Configuration Management

Manage Data Catalog configuration at the organization level, including migration settings and UI experience options.

def set_config(
    self,
    request: SetConfigRequest = None,
    *,
    name: str = None,
    config: OrganizationConfig = None,
    **kwargs
) -> OrganizationConfig:
    """
    Set organization-level Data Catalog configuration.

    Args:
        request: The request object
        name: str - Required. Format: organizations/{organization}/locations/{location}/config
        config: OrganizationConfig - Required. Configuration to set

    Returns:
        OrganizationConfig: The updated configuration
    """

def retrieve_config(
    self,
    request: RetrieveConfigRequest = None,
    *,
    name: str = None,
    **kwargs
) -> OrganizationConfig:
    """
    Retrieve organization-level Data Catalog configuration.

    Args:
        request: The request object
        name: str - Required. Format: organizations/{organization}/locations/{location}/config

    Returns:
        OrganizationConfig: The current configuration
    """

def retrieve_effective_config(
    self,
    request: RetrieveEffectiveConfigRequest = None,
    *,
    name: str = None,
    **kwargs
) -> OrganizationConfig:
    """
    Retrieve effective organization-level Data Catalog configuration.

    Args:
        request: The request object
        name: str - Required. Format: projects/{project}/locations/{location}/config

    Returns:
        OrganizationConfig: The effective configuration
    """

IAM Policy Management

Manage Identity and Access Management (IAM) policies for Data Catalog resources, controlling who can access entries, entry groups, and tag templates.

def set_iam_policy(
    self,
    request: SetIamPolicyRequest = None,
    *,
    resource: str = None,
    **kwargs
) -> Policy:
    """
    Set the IAM policy for a resource.

    Args:
        request: The request object
        resource: str - Required. Resource name (entry, entry group, or tag template)

    Returns:
        Policy: The updated IAM policy

    Raises:
        google.api_core.exceptions.PermissionDenied: Insufficient permissions
        google.api_core.exceptions.NotFound: Resource not found
    """

def get_iam_policy(
    self,
    request: GetIamPolicyRequest = None,
    *,
    resource: str = None,
    **kwargs
) -> Policy:
    """
    Get the IAM policy for a resource.

    Args:
        request: The request object
        resource: str - Required. Resource name (entry, entry group, or tag template)

    Returns:
        Policy: The current IAM policy
    """

def test_iam_permissions(
    self,
    request: TestIamPermissionsRequest = None,
    **kwargs
) -> TestIamPermissionsResponse:
    """
    Test IAM permissions on a resource.

    Args:
        request: The request object containing resource and permissions to test

    Returns:
        TestIamPermissionsResponse: Contains list of permissions the caller has
    """

Usage Example:

from google.cloud import datacatalog_v1
from google.iam.v1 import iam_policy_pb2, policy_pb2

client = datacatalog_v1.DataCatalogClient()

# Set IAM policy on an entry
policy = policy_pb2.Policy(
    bindings=[
        policy_pb2.Binding(
            role="roles/datacatalog.viewer",
            members=["user:analyst@company.com"]
        )
    ]
)

set_request = iam_policy_pb2.SetIamPolicyRequest(
    resource="projects/my-project/locations/us-central1/entryGroups/my-group/entries/my-entry",
    policy=policy
)

updated_policy = client.set_iam_policy(request=set_request)

# Test permissions
test_request = iam_policy_pb2.TestIamPermissionsRequest(
    resource="projects/my-project/locations/us-central1/entryGroups/my-group/entries/my-entry",
    permissions=["datacatalog.entries.get", "datacatalog.entries.update"]
)

permissions_response = client.test_iam_permissions(request=test_request)
print(f"Allowed permissions: {permissions_response.permissions}")

Request Types

class SearchCatalogRequest:
    scope: SearchCatalogRequest.Scope  # Required search scope
    query: str  # Required search query
    page_size: int  # Optional page size
    page_token: str  # Optional pagination token
    order_by: str  # Optional sort order

    class Scope:
        include_org_ids: Sequence[str]  # Organization IDs to include
        include_project_ids: Sequence[str]  # Project IDs to include
        include_gcp_public_datasets: bool  # Include public datasets
        restricted_locations: Sequence[str]  # Restrict to locations
        starred_only: bool  # Only starred entries
        include_public_tag_templates: bool  # Include public tag templates

class CreateEntryGroupRequest:
    parent: str  # Required parent location
    entry_group_id: str  # Required entry group ID
    entry_group: EntryGroup  # Required entry group

class CreateEntryRequest:  
    parent: str  # Required parent entry group
    entry_id: str  # Required entry ID
    entry: Entry  # Required entry

class LookupEntryRequest:
    linked_resource: str  # Optional linked resource name
    sql_resource: str  # Optional SQL resource name
    fully_qualified_name: str  # Optional fully qualified name
    project: str  # Optional project ID for FQN lookup
    location: str  # Optional location for FQN lookup

class SetIamPolicyRequest:
    resource: str  # Required resource name
    policy: Policy  # Required IAM policy to set

class GetIamPolicyRequest:
    resource: str  # Required resource name
    options: GetPolicyOptions  # Optional policy retrieval options

class TestIamPermissionsRequest:
    resource: str  # Required resource name
    permissions: Sequence[str]  # Required permissions to test

Response Types

class SearchCatalogResponse:
    results: Sequence[SearchCatalogResult]  # Search results
    next_page_token: str  # Token for next page
    unreachable: Sequence[str]  # Unreachable locations

class ListEntryGroupsResponse:
    entry_groups: Sequence[EntryGroup]  # Entry groups
    next_page_token: str  # Token for next page

class ListEntriesResponse:
    entries: Sequence[Entry]  # Entries
    next_page_token: str  # Token for next page

class TestIamPermissionsResponse:
    permissions: Sequence[str]  # Permissions the caller has on the resource

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-datacatalog

docs

bulk-operations.md

data-catalog.md

entry-metadata.md

index.md

policy-tags.md

tags.md

taxonomy-serialization.md

tile.json