Google Cloud Data Loss Prevention (DLP) API client library for discovering, classifying, and protecting sensitive data
—
Real-time analysis of text and images to detect, redact, and transform sensitive information. Content analysis operations process data immediately and return results synchronously, making them ideal for interactive applications and small-scale data processing.
Analyzes text content to identify sensitive information using built-in and custom detectors. Supports various content formats including plain text, structured tables, and document metadata.
def inspect_content(
request: dlp.InspectContentRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.InspectContentResponse:
"""
Finds potentially sensitive info in content.
Args:
request: InspectContentRequest containing content and configuration
retry: Retry configuration for failed requests
timeout: Timeout for the request
metadata: Request metadata
Returns:
InspectContentResponse with detected findings
"""from google.cloud import dlp
client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"
# Configure what to detect
inspect_config = dlp.InspectConfig(
info_types=[
dlp.InfoType(name="PHONE_NUMBER"),
dlp.InfoType(name="EMAIL_ADDRESS"),
dlp.InfoType(name="CREDIT_CARD_NUMBER"),
],
min_likelihood=dlp.Likelihood.POSSIBLE,
include_quote=True,
)
# Content to inspect
content_item = dlp.ContentItem(
value="Contact John at john@example.com or 555-123-4567"
)
# Create and send request
request = dlp.InspectContentRequest(
parent=parent,
inspect_config=inspect_config,
item=content_item,
)
response = client.inspect_content(request=request)
# Process findings
for finding in response.result.findings:
print(f"Found {finding.info_type.name}: {finding.quote}")
print(f"Likelihood: {finding.likelihood}")Transforms sensitive information in content using various techniques including masking, encryption, tokenization, and bucketing. Supports both reversible and irreversible transformations.
def deidentify_content(
request: dlp.DeidentifyContentRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DeidentifyContentResponse:
"""
De-identifies potentially sensitive info from a ContentItem.
Args:
request: DeidentifyContentRequest with content and transformation config
retry: Retry configuration for failed requests
timeout: Timeout for the request
metadata: Request metadata
Returns:
DeidentifyContentResponse with transformed content
"""from google.cloud import dlp
client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"
# Configure de-identification
deidentify_config = dlp.DeidentifyConfig(
info_type_transformations=dlp.InfoTypeTransformations(
transformations=[
dlp.InfoTypeTransformations.InfoTypeTransformation(
info_types=[dlp.InfoType(name="EMAIL_ADDRESS")],
primitive_transformation=dlp.PrimitiveTransformation(
character_mask_config=dlp.CharacterMaskConfig(
masking_character="*",
number_to_mask=5,
)
),
)
]
)
)
# Content to de-identify
content_item = dlp.ContentItem(
value="Contact support at support@example.com"
)
request = dlp.DeidentifyContentRequest(
parent=parent,
deidentify_config=deidentify_config,
item=content_item,
)
response = client.deidentify_content(request=request)
print(f"De-identified: {response.item.value}")Reverses de-identification transformations to restore original sensitive values. Only works with reversible transformations like deterministic encryption or tokenization.
def reidentify_content(
request: dlp.ReidentifyContentRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ReidentifyContentResponse:
"""
Re-identifies content that has been de-identified.
Args:
request: ReidentifyContentRequest with de-identified content and config
retry: Retry configuration for failed requests
timeout: Timeout for the request
metadata: Request metadata
Returns:
ReidentifyContentResponse with original content restored
"""Redacts sensitive information from images by detecting and obscuring text within image files. Supports various image formats and redaction methods.
def redact_image(
request: dlp.RedactImageRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.RedactImageResponse:
"""
Redacts potentially sensitive info from an image.
Args:
request: RedactImageRequest with image data and redaction config
retry: Retry configuration for failed requests
timeout: Timeout for the request
metadata: Request metadata
Returns:
RedactImageResponse with redacted image
"""from google.cloud import dlp
client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"
# Read image file
with open("document.png", "rb") as f:
image_data = f.read()
# Configure redaction
inspect_config = dlp.InspectConfig(
info_types=[dlp.InfoType(name="PHONE_NUMBER")]
)
image_redaction_config = dlp.RedactImageRequest.ImageRedactionConfig(
info_type=dlp.InfoType(name="PHONE_NUMBER"),
redact_all_text=False,
redaction_color=dlp.Color(red=0.5, green=0.5, blue=0.5),
)
request = dlp.RedactImageRequest(
parent=parent,
byte_item=dlp.ByteContentItem(
type_=dlp.ByteContentItem.BytesType.IMAGE_PNG,
data=image_data,
),
inspect_config=inspect_config,
image_redaction_configs=[image_redaction_config],
)
response = client.redact_image(request=request)
# Save redacted image
with open("redacted_document.png", "wb") as f:
f.write(response.redacted_image)Lists all available sensitive information types that can be detected by the DLP API, including built-in detectors and custom stored info types.
def list_info_types(
request: dlp.ListInfoTypesRequest,
*,
parent: Optional[str] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ListInfoTypesResponse:
"""
Returns a list of the sensitive information types that the DLP API supports.
Args:
request: ListInfoTypesRequest
parent: Parent resource name (format: locations/{location_id})
retry: Retry configuration for failed requests
timeout: Timeout for the request
metadata: Request metadata
Returns:
ListInfoTypesResponse with available info types
"""from google.cloud import dlp
client = dlp.DlpServiceClient()
parent = f"projects/{project_id}/locations/global"
request = dlp.ListInfoTypesRequest(parent=parent)
response = client.list_info_types(request=request)
print("Available Info Types:")
for info_type in response.info_types:
print(f"- {info_type.name}: {info_type.display_name}")
if info_type.description:
print(f" {info_type.description[:100]}...")class InspectContentRequest:
"""Request to inspect content for sensitive information."""
parent: str
inspect_config: InspectConfig
item: ContentItem
inspect_template_name: str
location_id: str
class DeidentifyContentRequest:
"""Request to de-identify sensitive content."""
parent: str
deidentify_config: DeidentifyConfig
inspect_config: InspectConfig
item: ContentItem
inspect_template_name: str
deidentify_template_name: str
location_id: str
class ReidentifyContentRequest:
"""Request to re-identify previously de-identified content."""
parent: str
reidentify_config: DeidentifyConfig
inspect_config: InspectConfig
item: ContentItem
inspect_template_name: str
reidentify_template_name: str
location_id: str
class RedactImageRequest:
"""Request to redact sensitive information from images."""
parent: str
location_id: str
inspect_config: InspectConfig
image_redaction_configs: Sequence[ImageRedactionConfig]
include_findings: bool
byte_item: ByteContentItem
class ListInfoTypesRequest:
"""Request to list available information types."""
parent: str
language_code: str
filter: str
location_id: strclass InspectContentResponse:
"""Response from content inspection."""
result: InspectResult
class DeidentifyContentResponse:
"""Response from content de-identification."""
item: ContentItem
overview: TransformationOverview
class ReidentifyContentResponse:
"""Response from content re-identification."""
item: ContentItem
overview: TransformationOverview
class RedactImageResponse:
"""Response from image redaction."""
redacted_image: bytes
extracted_text: str
inspect_result: InspectResult
class ListInfoTypesResponse:
"""Response listing available information types."""
info_types: Sequence[InfoTypeDescription]class InspectConfig:
"""Configuration for content inspection."""
info_types: Sequence[InfoType]
min_likelihood: Likelihood
limits: FindingLimits
include_quote: bool
exclude_info_types: bool
custom_info_types: Sequence[CustomInfoType]
content_options: Sequence[ContentOption]
rule_set: Sequence[InspectionRuleSet]
class DeidentifyConfig:
"""Configuration for de-identification transformations."""
info_type_transformations: InfoTypeTransformations
record_transformations: RecordTransformations
transformation_error_handling: TransformationErrorHandling
class ContentItem:
"""Container for content to be processed."""
value: str
table: Table
byte_item: ByteContentItemInstall with Tessl CLI
npx tessl i tessl/pypi-google-cloud-dlp