Google Cloud Data Loss Prevention (DLP) API client library for discovering, classifying, and protecting sensitive data
npx @tessl/cli install tessl/pypi-google-cloud-dlp@3.31.0Google Cloud Data Loss Prevention (DLP) API enables organizations to discover, classify, and protect sensitive data across their cloud and hybrid environments. It provides comprehensive content inspection, data transformation, risk analysis, and automated data discovery capabilities with extensive configuration options for compliance and privacy requirements.
pip install google-cloud-dlpfrom google.cloud import dlpFor direct access to v2 API:
from google.cloud import dlp_v2from google.cloud import dlp
# Initialize the DLP client
client = dlp.DlpServiceClient()
# Basic content inspection
parent = f"projects/{project_id}/locations/global"
content_item = dlp.ContentItem(value="My SSN is 123-45-6789")
# Configure inspection
inspect_config = dlp.InspectConfig(
info_types=[dlp.InfoType(name="US_SOCIAL_SECURITY_NUMBER")]
)
# Create request
request = dlp.InspectContentRequest(
parent=parent,
inspect_config=inspect_config,
item=content_item,
)
# Inspect content
response = client.inspect_content(request=request)
# Process findings
for finding in response.result.findings:
print(f"Found {finding.info_type.name}: {finding.quote}")The Google Cloud DLP API follows a service-oriented architecture with distinct functional areas:
The API supports both immediate operations for small datasets and batch processing for enterprise-scale data protection workflows.
Real-time analysis of text and images to detect, redact, and transform sensitive information. Supports immediate inspection with customizable info types and confidence levels.
def inspect_content(
request: dlp.InspectContentRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.InspectContentResponse: ...
def deidentify_content(
request: dlp.DeidentifyContentRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DeidentifyContentResponse: ...
def redact_image(
request: dlp.RedactImageRequest,
*,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.RedactImageResponse: ...Reusable configurations for inspection and de-identification operations. Templates standardize DLP policies across an organization and simplify repeated operations.
def create_inspect_template(
request: dlp.CreateInspectTemplateRequest,
*,
parent: Optional[str] = None,
inspect_template: Optional[dlp.InspectTemplate] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.InspectTemplate: ...
def create_deidentify_template(
request: dlp.CreateDeidentifyTemplateRequest,
*,
parent: Optional[str] = None,
deidentify_template: Optional[dlp.DeidentifyTemplate] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DeidentifyTemplate: ...Long-running batch operations for processing large datasets, including scheduled triggers, hybrid content inspection, and job lifecycle management.
def create_dlp_job(
request: dlp.CreateDlpJobRequest,
*,
parent: Optional[str] = None,
inspect_job: Optional[dlp.InspectJobConfig] = None,
risk_job: Optional[dlp.RiskAnalysisJobConfig] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DlpJob: ...
def create_job_trigger(
request: dlp.CreateJobTriggerRequest,
*,
parent: Optional[str] = None,
job_trigger: Optional[dlp.JobTrigger] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.JobTrigger: ...Automated scanning and profiling of cloud data sources to understand data distribution, sensitivity, and compliance posture across BigQuery, Cloud Storage, Cloud SQL, and more.
def create_discovery_config(
request: dlp.CreateDiscoveryConfigRequest,
*,
parent: Optional[str] = None,
discovery_config: Optional[dlp.DiscoveryConfig] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.DiscoveryConfig: ...Access to data profiles and insights generated by discovery scans, providing visibility into data sensitivity, distribution, and risk levels across projects, tables, columns, and file stores.
def get_project_data_profile(
request: dlp.GetProjectDataProfileRequest,
*,
name: Optional[str] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ProjectDataProfile: ...
def get_table_data_profile(
request: dlp.GetTableDataProfileRequest,
*,
name: Optional[str] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.TableDataProfile: ...
def get_column_data_profile(
request: dlp.GetColumnDataProfileRequest,
*,
name: Optional[str] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.ColumnDataProfile: ...
def get_file_store_data_profile(
request: dlp.GetFileStoreDataProfileRequest,
*,
name: Optional[str] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.FileStoreDataProfile: ...Custom sensitive information detection patterns for organization-specific data types. Extends built-in detectors with custom dictionaries, regular expressions, and machine learning models.
def create_stored_info_type(
request: dlp.CreateStoredInfoTypeRequest,
*,
parent: Optional[str] = None,
config: Optional[dlp.StoredInfoTypeConfig] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.StoredInfoType: ...External data source connections for accessing data outside Google Cloud, including database connections, cloud storage from other providers, and hybrid environments.
def create_connection(
request: dlp.CreateConnectionRequest,
*,
parent: Optional[str] = None,
connection: Optional[dlp.Connection] = None,
retry: OptionalRetry = gapic_v1.method.DEFAULT,
timeout: Union[float, object] = gapic_v1.method.DEFAULT,
metadata: Sequence[Tuple[str, Union[str, bytes]]] = (),
) -> dlp.Connection: ...class DlpServiceClient:
"""Synchronous client for Google Cloud DLP service operations."""
def __init__(
self,
*,
credentials: Optional[ga_credentials.Credentials] = None,
transport: Optional[DlpServiceTransport] = None,
client_options: Optional[ClientOptions] = None,
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
) -> None: ...
class DlpServiceAsyncClient:
"""Asynchronous client for Google Cloud DLP service operations."""
def __init__(
self,
*,
credentials: Optional[ga_credentials.Credentials] = None,
transport: Optional[DlpServiceAsyncTransport] = None,
client_options: Optional[ClientOptions] = None,
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
) -> None: ...class ContentItem:
"""Container for content to be inspected."""
value: str
table: Table
byte_item: ByteContentItem
class InfoType:
"""Type of information detector."""
name: str
version: str
sensitivity_score: SensitivityScore
class Finding:
"""Detected sensitive information."""
info_type: InfoType
likelihood: Likelihood
location: Location
quote: str
quote_info: QuoteInfo
class InspectConfig:
"""Configuration for content inspection."""
info_types: Sequence[InfoType]
min_likelihood: Likelihood
limits: InspectConfig.FindingLimits
include_quote: bool
exclude_info_types: boolclass DeidentifyConfig:
"""Configuration for content de-identification."""
info_type_transformations: InfoTypeTransformations
record_transformations: RecordTransformations
transformation_error_handling: TransformationErrorHandling
class PrimitiveTransformation:
"""Basic data transformation operations."""
replace_config: ReplaceValueConfig
redact_config: RedactConfig
character_mask_config: CharacterMaskConfig
crypto_replace_ffx_fpe_config: CryptoReplaceFfxFpeConfig
fixed_size_bucketing_config: FixedSizeBucketingConfig
bucketing_config: BucketingConfig
replace_dictionary_config: ReplaceDictionaryConfig
time_part_config: TimePartConfig
crypto_hash_config: CryptoHashConfig
date_shift_config: DateShiftConfig
crypto_deterministic_config: CryptoDeterministicConfigclass Likelihood(proto.Enum):
"""Likelihood levels for detection confidence."""
LIKELIHOOD_UNSPECIFIED = 0
VERY_UNLIKELY = 1
UNLIKELY = 2
POSSIBLE = 3
LIKELY = 4
VERY_LIKELY = 5
class FileType(proto.Enum):
"""Supported file types for processing."""
FILE_TYPE_UNSPECIFIED = 0
BINARY_FILE = 1
TEXT_FILE = 2
IMAGE = 3
WORD = 5
PDF = 6
AVRO = 7
CSV = 8
TSV = 9
POWERPOINT = 11
EXCEL = 12