Presidio Anonymizer package - replaces analyzed text with desired values.
npx @tessl/cli install tessl/pypi-presidio-anonymizer@2.2.0Presidio Anonymizer is a Python library for anonymizing and deanonymizing text by replacing or transforming personally identifiable information (PII) entities. It takes analyzer results containing PII locations and applies configurable anonymization operators like masking, replacement, encryption, or redaction.
pip install presidio-anonymizerpip install presidio-anonymizer[server] (includes Flask REST API)pip install presidio-anonymizer[ahds] (includes AHDS operators)from presidio_anonymizer import (
AnonymizerEngine,
DeanonymizeEngine,
BatchAnonymizerEngine,
RecognizerResult,
OperatorConfig,
ConflictResolutionStrategy
)from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import RecognizerResult, OperatorConfig
# Initialize the anonymizer engine
engine = AnonymizerEngine()
# Define the text to anonymize
text = "My name is John Doe and my email is john.doe@example.com"
# Define analyzer results (typically from presidio-analyzer)
analyzer_results = [
RecognizerResult(entity_type="PERSON", start=11, end=19, score=0.9),
RecognizerResult(entity_type="EMAIL_ADDRESS", start=36, end=57, score=0.9)
]
# Configure anonymization operators
operators = {
"PERSON": OperatorConfig("replace", {"new_value": "[PERSON]"}),
"EMAIL_ADDRESS": OperatorConfig("mask", {"masking_char": "*", "chars_to_mask": 5, "from_end": True})
}
# Anonymize the text
result = engine.anonymize(
text=text,
analyzer_results=analyzer_results,
operators=operators
)
print(result.text) # "My name is [PERSON] and my email is john.***@example.com"Presidio Anonymizer uses a modular architecture:
Primary anonymization functionality using the AnonymizerEngine. Processes text with analyzer results and applies configurable operators to transform PII entities.
class AnonymizerEngine:
def anonymize(
self,
text: str,
analyzer_results: List[RecognizerResult],
operators: Optional[Dict[str, OperatorConfig]] = None,
conflict_resolution: ConflictResolutionStrategy = ConflictResolutionStrategy.MERGE_SIMILAR_OR_CONTAINED
) -> EngineResult: ...
def get_anonymizers(self) -> List[str]: ...
def add_anonymizer(self, anonymizer_cls: Type[Operator]) -> None: ...
def remove_anonymizer(self, anonymizer_cls: Type[Operator]) -> None: ...Reverse anonymization operations using the DeanonymizeEngine. Restores original text from anonymized content when using reversible operators like encryption.
class DeanonymizeEngine:
def deanonymize(
self,
text: str,
entities: List[OperatorResult],
operators: Dict[str, OperatorConfig]
) -> EngineResult: ...
def get_deanonymizers(self) -> List[str]: ...
def add_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None: ...
def remove_deanonymizer(self, deanonymizer_cls: Type[Operator]) -> None: ...Batch anonymization for lists and dictionaries using the BatchAnonymizerEngine. Efficiently processes multiple texts or structured data formats.
class BatchAnonymizerEngine:
def __init__(self, anonymizer_engine: Optional[AnonymizerEngine] = None): ...
def anonymize_list(
self,
texts: List[Optional[Union[str, bool, int, float]]],
recognizer_results_list: List[List[RecognizerResult]],
**kwargs
) -> List[Union[str, Any]]: ...
def anonymize_dict(
self,
analyzer_results: Iterable[DictRecognizerResult],
**kwargs
) -> Dict[str, str]: ...Built-in transformation operators for different anonymization strategies including replacement, masking, encryption, hashing, and redaction.
# Replacement operators
class Replace(Operator): ...
class Redact(Operator): ...
class Keep(Operator): ...
# Transformation operators
class Mask(Operator): ...
class Hash(Operator): ...
# Encryption operators
class Encrypt(Operator): ...
class Decrypt(Operator): ...
# Custom operators
class Custom(Operator): ...Core data structures for configuration, input, and results including recognizer results, operator configurations, and engine outputs.
class RecognizerResult:
def __init__(self, entity_type: str, start: int, end: int, score: float): ...
@classmethod
def from_json(cls, data: Dict) -> "RecognizerResult": ...
class OperatorConfig:
def __init__(self, operator_name: str, params: Dict = None): ...
@classmethod
def from_json(cls, params: Dict) -> "OperatorConfig": ...
class EngineResult:
text: str
items: List[OperatorResult]from enum import Enum
from typing import Dict, List, Optional, Union, Type, Iterable, Any
class ConflictResolutionStrategy(Enum):
MERGE_SIMILAR_OR_CONTAINED = "merge_similar_or_contained"
REMOVE_INTERSECTIONS = "remove_intersections"
class InvalidParamError(Exception):
"""Exception raised for invalid parameters."""
# Type aliases
OperatorType = Enum # Anonymize, Deanonymize
PIIEntity = object # Base entity with start/end positions