tessl/pypi-google-cloud-dlp

Google Cloud Data Loss Prevention (DLP) API client library for discovering, classifying, and protecting sensitive data

—

Pending

Overview

Eval results

Files

Content Analysis

Name: tessl/pypi-google-cloud-dlp
Author: tessl

Real-time analysis of text and images to detect, redact, and transform sensitive information. Content analysis operations process data immediately and return results synchronously, making them ideal for interactive applications and small-scale data processing.

Capabilities

Content Inspection

Analyzes text content to identify sensitive information using built-in and custom detectors. Supports various content formats including plain text, structured tables, and document metadata.

def inspect_content(
    request: dlp.InspectContentRequest,
    *,
    retry: OptionalRetry = gapic_v1.method.DEFAULT,
    timeout: Union[float, object] = gapic_v1.method.DEFAULT,
    metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.InspectContentResponse:
    """
    Finds potentially sensitive info in content.

    Args:
        request: InspectContentRequest containing content and configuration
        retry: Retry configuration for failed requests
        timeout: Timeout for the request
        metadata: Request metadata

    Returns:
        InspectContentResponse with detected findings
    """

Usage Example

from google.cloud import dlp

client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"

# Configure what to detect
inspect_config = dlp.InspectConfig(
    info_types=[
        dlp.InfoType(name="PHONE_NUMBER"),
        dlp.InfoType(name="EMAIL_ADDRESS"),
        dlp.InfoType(name="CREDIT_CARD_NUMBER"),
    ],
    min_likelihood=dlp.Likelihood.POSSIBLE,
    include_quote=True,
)

# Content to inspect
content_item = dlp.ContentItem(
    value="Contact John at john@example.com or 555-123-4567"
)

# Create and send request
request = dlp.InspectContentRequest(
    parent=parent,
    inspect_config=inspect_config,
    item=content_item,
)

response = client.inspect_content(request=request)

# Process findings
for finding in response.result.findings:
    print(f"Found {finding.info_type.name}: {finding.quote}")
    print(f"Likelihood: {finding.likelihood}")

Content De-identification

Transforms sensitive information in content using various techniques including masking, encryption, tokenization, and bucketing. Supports both reversible and irreversible transformations.

def deidentify_content(
    request: dlp.DeidentifyContentRequest,
    *,
    retry: OptionalRetry = gapic_v1.method.DEFAULT,
    timeout: Union[float, object] = gapic_v1.method.DEFAULT,
    metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DeidentifyContentResponse:
    """
    De-identifies potentially sensitive info from a ContentItem.

    Args:
        request: DeidentifyContentRequest with content and transformation config
        retry: Retry configuration for failed requests
        timeout: Timeout for the request
        metadata: Request metadata

    Returns:
        DeidentifyContentResponse with transformed content
    """

Usage Example

from google.cloud import dlp

client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"

# Configure de-identification
deidentify_config = dlp.DeidentifyConfig(
    info_type_transformations=dlp.InfoTypeTransformations(
        transformations=[
            dlp.InfoTypeTransformations.InfoTypeTransformation(
                info_types=[dlp.InfoType(name="EMAIL_ADDRESS")],
                primitive_transformation=dlp.PrimitiveTransformation(
                    character_mask_config=dlp.CharacterMaskConfig(
                        masking_character="*",
                        number_to_mask=5,
                    )
                ),
            )
        ]
    )
)

# Content to de-identify
content_item = dlp.ContentItem(
    value="Contact support at support@example.com"
)

request = dlp.DeidentifyContentRequest(
    parent=parent,
    deidentify_config=deidentify_config,
    item=content_item,
)

response = client.deidentify_content(request=request)
print(f"De-identified: {response.item.value}")

Content Re-identification

Reverses de-identification transformations to restore original sensitive values. Only works with reversible transformations like deterministic encryption or tokenization.

def reidentify_content(
    request: dlp.ReidentifyContentRequest,
    *,
    retry: OptionalRetry = gapic_v1.method.DEFAULT,
    timeout: Union[float, object] = gapic_v1.method.DEFAULT,
    metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ReidentifyContentResponse:
    """
    Re-identifies content that has been de-identified.

    Args:
        request: ReidentifyContentRequest with de-identified content and config
        retry: Retry configuration for failed requests
        timeout: Timeout for the request
        metadata: Request metadata

    Returns:
        ReidentifyContentResponse with original content restored
    """

Image Redaction

Redacts sensitive information from images by detecting and obscuring text within image files. Supports various image formats and redaction methods.

def redact_image(
    request: dlp.RedactImageRequest,
    *,
    retry: OptionalRetry = gapic_v1.method.DEFAULT,
    timeout: Union[float, object] = gapic_v1.method.DEFAULT,
    metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.RedactImageResponse:
    """
    Redacts potentially sensitive info from an image.

    Args:
        request: RedactImageRequest with image data and redaction config
        retry: Retry configuration for failed requests
        timeout: Timeout for the request
        metadata: Request metadata

    Returns:
        RedactImageResponse with redacted image
    """

Usage Example

from google.cloud import dlp

client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"

# Read image file
with open("document.png", "rb") as f:
    image_data = f.read()

# Configure redaction
inspect_config = dlp.InspectConfig(
    info_types=[dlp.InfoType(name="PHONE_NUMBER")]
)

image_redaction_config = dlp.RedactImageRequest.ImageRedactionConfig(
    info_type=dlp.InfoType(name="PHONE_NUMBER"),
    redact_all_text=False,
    redaction_color=dlp.Color(red=0.5, green=0.5, blue=0.5),
)

request = dlp.RedactImageRequest(
    parent=parent,
    byte_item=dlp.ByteContentItem(
        type_=dlp.ByteContentItem.BytesType.IMAGE_PNG,
        data=image_data,
    ),
    inspect_config=inspect_config,
    image_redaction_configs=[image_redaction_config],
)

response = client.redact_image(request=request)

# Save redacted image
with open("redacted_document.png", "wb") as f:
    f.write(response.redacted_image)

Info Type Discovery

Lists all available sensitive information types that can be detected by the DLP API, including built-in detectors and custom stored info types.

def list_info_types(
    request: dlp.ListInfoTypesRequest,
    *,
    parent: Optional[str] = None,
    retry: OptionalRetry = gapic_v1.method.DEFAULT,
    timeout: Union[float, object] = gapic_v1.method.DEFAULT,
    metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ListInfoTypesResponse:
    """
    Returns a list of the sensitive information types that the DLP API supports.

    Args:
        request: ListInfoTypesRequest
        parent: Parent resource name (format: locations/{location_id})
        retry: Retry configuration for failed requests
        timeout: Timeout for the request
        metadata: Request metadata

    Returns:
        ListInfoTypesResponse with available info types
    """

Usage Example

from google.cloud import dlp

client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"

request = dlp.ListInfoTypesRequest(parent=parent)
response = client.list_info_types(request=request)

print("Available Info Types:")
for info_type in response.info_types:
    print(f"- {info_type.name}: {info_type.display_name}")
    if info_type.description:
        print(f"  {info_type.description[:100]}...")

Types

Request Types

class InspectContentRequest:
    """Request to inspect content for sensitive information."""
    
    parent: str
    inspect_config: InspectConfig
    item: ContentItem
    inspect_template_name: str
    location_id: str

class DeidentifyContentRequest:
    """Request to de-identify sensitive content."""
    
    parent: str
    deidentify_config: DeidentifyConfig
    inspect_config: InspectConfig
    item: ContentItem
    inspect_template_name: str
    deidentify_template_name: str
    location_id: str

class ReidentifyContentRequest:
    """Request to re-identify previously de-identified content."""
    
    parent: str
    reidentify_config: DeidentifyConfig
    inspect_config: InspectConfig
    item: ContentItem
    inspect_template_name: str
    reidentify_template_name: str
    location_id: str

class RedactImageRequest:
    """Request to redact sensitive information from images."""
    
    parent: str
    location_id: str
    inspect_config: InspectConfig
    image_redaction_configs: Sequence[ImageRedactionConfig]
    include_findings: bool
    byte_item: ByteContentItem

class ListInfoTypesRequest:
    """Request to list available information types."""
    
    parent: str
    language_code: str
    filter: str
    location_id: str

Response Types

class InspectContentResponse:
    """Response from content inspection."""
    
    result: InspectResult

class DeidentifyContentResponse:
    """Response from content de-identification."""
    
    item: ContentItem
    overview: TransformationOverview

class ReidentifyContentResponse:
    """Response from content re-identification."""
    
    item: ContentItem
    overview: TransformationOverview

class RedactImageResponse:
    """Response from image redaction."""
    
    redacted_image: bytes
    extracted_text: str
    inspect_result: InspectResult

class ListInfoTypesResponse:
    """Response listing available information types."""
    
    info_types: Sequence[InfoTypeDescription]

Configuration Types

class InspectConfig:
    """Configuration for content inspection."""
    
    info_types: Sequence[InfoType]
    min_likelihood: Likelihood
    limits: FindingLimits
    include_quote: bool
    exclude_info_types: bool
    custom_info_types: Sequence[CustomInfoType]
    content_options: Sequence[ContentOption]
    rule_set: Sequence[InspectionRuleSet]

class DeidentifyConfig:
    """Configuration for de-identification transformations."""
    
    info_type_transformations: InfoTypeTransformations
    record_transformations: RecordTransformations
    transformation_error_handling: TransformationErrorHandling

class ContentItem:
    """Container for content to be processed."""
    
    value: str
    table: Table
    byte_item: ByteContentItem

Install with Tessl CLI