Presidio Anonymizer package - replaces analyzed text with desired values.
—
Presidio Anonymizer provides built-in transformation operators for different anonymization strategies. Operators are configurable modules that transform detected PII entities according to specific algorithms.
Replace detected PII with specified text values.
class Replace(Operator):
"""Replaces detected text with specified replacement text."""
# Configuration
OperatorConfig("replace", {
"new_value": str # Text to replace with
})Usage Examples:
from presidio_anonymizer.entities import OperatorConfig
# Replace with placeholder
replace_config = OperatorConfig("replace", {"new_value": "[REDACTED]"})
# Replace with entity type
replace_config = OperatorConfig("replace", {"new_value": "<PERSON>"})
# Replace with custom text
replace_config = OperatorConfig("replace", {"new_value": "CONFIDENTIAL"})Completely remove detected PII from text.
class Redact(Operator):
"""Completely removes/redacts detected text."""
# Configuration (no parameters required)
OperatorConfig("redact")Usage Example:
# Simple redaction - removes text entirely
redact_config = OperatorConfig("redact")
# Result: "Contact at " (name completely removed)Mask characters in detected text with specified masking character.
class Mask(Operator):
"""Masks characters in detected text with specified masking character."""
# Configuration
OperatorConfig("mask", {
"masking_char": str, # Character to use for masking (default: "*")
"chars_to_mask": int, # Number of characters to mask (default: all)
"from_end": bool # Mask from end instead of beginning (default: False)
})Usage Examples:
# Mask all characters
mask_all = OperatorConfig("mask", {"masking_char": "*"})
# Mask last 4 characters
mask_end = OperatorConfig("mask", {
"masking_char": "*",
"chars_to_mask": 4,
"from_end": True
})
# Mask first 3 characters
mask_start = OperatorConfig("mask", {
"masking_char": "#",
"chars_to_mask": 3,
"from_end": False
})
# Examples:
# "john@email.com" -> "j***@email.com" (last 4 from end)
# "555-1234" -> "###-1234" (first 3)Replace detected text with hash values.
class Hash(Operator):
"""Replaces detected text with hash value."""
# Configuration
OperatorConfig("hash", {
"hash_type": str # Hash algorithm (default: "sha256")
})Usage Example:
# SHA256 hash (default)
hash_config = OperatorConfig("hash")
# Specific hash algorithm
hash_md5 = OperatorConfig("hash", {"hash_type": "md5"})
# Result: "Contact a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3 at..."Encrypt detected text using AES encryption for reversible anonymization.
class Encrypt(Operator):
"""Encrypts detected text using AES encryption."""
# Configuration
OperatorConfig("encrypt", {
"key": str # 32-character encryption key (required)
})Usage Example:
# AES encryption with key
encrypt_config = OperatorConfig("encrypt", {
"key": "my-secret-key-32-characters-long12" # Must be exactly 32 characters
})
# Result: encrypted text that can be decrypted laterKeep original text without any transformation.
class Keep(Operator):
"""Keeps original text unchanged."""
# Configuration (no parameters required)
OperatorConfig("keep")Usage Example:
# Keep certain entity types unchanged
keep_config = OperatorConfig("keep")
# Useful for preserving certain PII types while anonymizing others
operators = {
"PERSON": OperatorConfig("replace", {"new_value": "[PERSON]"}),
"LOCATION": OperatorConfig("keep") # Keep locations unchanged
}Create custom anonymization logic by extending the Operator base class.
class Custom(Operator):
"""Allows custom anonymization logic."""
# Configuration
OperatorConfig("custom", {
"lambda": str # Python lambda expression as string
})Usage Example:
# Custom transformation using lambda
# Note: Custom operator uses eval() - ensure lambda expressions are trusted
custom_config = OperatorConfig("custom", {
"lambda": "lambda x: x.upper().replace(' ', '_')"
})
# Result: "JOHN_DOE" instead of "John Doe"Decrypt previously encrypted text using matching key.
class Decrypt(Operator):
"""Decrypts previously encrypted text."""
# Configuration
OperatorConfig("decrypt", {
"key": str # Must match original encryption key
})Usage Example:
# Must use same key as encryption
decrypt_config = OperatorConfig("decrypt", {
"key": "my-secret-key-32-characters-long12" # Same as encrypt key
})Keep text unchanged during deanonymization.
class DeanonymizeKeep(Operator):
"""Keeps text unchanged during deanonymization."""
# Configuration (no parameters required)
OperatorConfig("deanonymize_keep")When the ahds optional dependency is installed, additional operators are available:
Uses Azure Health Data Services for surrogate key generation.
class AHDSSurrogate(Operator):
"""Azure Health Data Services surrogate key operator."""
# Available only when 'ahds' dependencies are installedInstallation:
# Install with AHDS support
pip install presidio-anonymizer[ahds]Availability Check:
from presidio_anonymizer.operators import AHDS_AVAILABLE
if AHDS_AVAILABLE:
from presidio_anonymizer.operators import AHDSSurrogate
# Configure AHDS surrogate operator
ahds_config = OperatorConfig("ahds_surrogate", {
# Configuration parameters for AHDS integration
})
# Use in operators dictionary
operators = {
"PERSON": ahds_config,
"PHONE_NUMBER": ahds_config
}
else:
print("AHDS operators not available - install with: pip install presidio-anonymizer[ahds]")Feature Check in Code:
# Programmatically check for AHDS availability
def get_available_operators():
"""Get list of all available operators including optional ones."""
from presidio_anonymizer.operators import AHDS_AVAILABLE
operators = ['replace', 'mask', 'hash', 'encrypt', 'redact', 'keep', 'custom']
if AHDS_AVAILABLE:
operators.append('ahds_surrogate')
return operatorsManage and extend available operators using the factory pattern.
class OperatorsFactory:
"""Factory for managing anonymization and deanonymization operators."""
def get_anonymizers(self) -> Dict[str, Type[Operator]]:
"""
Get all available anonymization operators.
Returns:
Dict[str, Type[Operator]]: Dictionary mapping operator names to operator classes
"""
def get_deanonymizers(self) -> Dict[str, Type[Operator]]:
"""
Get all available deanonymization operators.
Returns:
Dict[str, Type[Operator]]: Dictionary mapping operator names to operator classes
"""
def add_anonymize_operator(self, operator_cls: Type[Operator]) -> None:
"""
Add a new anonymization operator.
Parameters:
- operator_cls (Type[Operator]): The operator class to add
"""
def add_deanonymize_operator(self, operator_cls: Type[Operator]) -> None:
"""
Add a new deanonymization operator.
Parameters:
- operator_cls (Type[Operator]): The operator class to add
"""
def remove_anonymize_operator(self, operator_cls: Type[Operator]) -> None:
"""
Remove an anonymization operator.
Parameters:
- operator_cls (Type[Operator]): The operator class to remove
"""
def remove_deanonymize_operator(self, operator_cls: Type[Operator]) -> None:
"""
Remove a deanonymization operator.
Parameters:
- operator_cls (Type[Operator]): The operator class to remove
"""Usage Example:
from presidio_anonymizer.operators import OperatorsFactory, Operator
# Access the factory (typically used internally by engines)
factory = OperatorsFactory()
# Get available operators
anonymizers = factory.get_anonymizers()
print(anonymizers.keys()) # ['replace', 'mask', 'hash', 'encrypt', 'redact', 'keep', 'custom']
deanonymizers = factory.get_deanonymizers()
print(deanonymizers.keys()) # ['decrypt', 'deanonymize_keep']
# Add custom operator
class MyCustomOperator(Operator):
def operate(self, text, params):
return f"CUSTOM_{text.upper()}"
factory.add_anonymize_operator(MyCustomOperator)Extend the base Operator class to create custom transformation logic:
from presidio_anonymizer.operators import Operator
class CustomROT13(Operator):
def operate(self, text, params):
"""Apply ROT13 transformation."""
return text.encode('rot13')
def validate(self, params):
"""Validate operator parameters."""
pass # No parameters needed for ROT13
# Register with engine
from presidio_anonymizer import AnonymizerEngine
engine = AnonymizerEngine()
engine.add_anonymizer(CustomROT13)
# Use in configuration
operators = {"PERSON": OperatorConfig("custom_rot13")}Install with Tessl CLI
npx tessl i tessl/pypi-presidio-anonymizer