Presidio Anonymizer package - replaces analyzed text with desired values.
—
Core data structures used for configuration, input, and results in presidio-anonymizer operations. These entities enable structured communication between components and provide type-safe configuration.
Represents detected PII entities with location and confidence information, typically provided by presidio-analyzer.
class RecognizerResult:
def __init__(self, entity_type: str, start: int, end: int, score: float):
"""
Initialize RecognizerResult.
Parameters:
- entity_type (str): Type of PII entity (e.g., "PERSON", "EMAIL_ADDRESS")
- start (int): Start position in text (inclusive)
- end (int): End position in text (exclusive)
- score (float): Confidence score (0.0 to 1.0)
"""
@classmethod
def from_json(cls, data: Dict) -> "RecognizerResult":
"""
Create RecognizerResult from JSON dictionary.
Parameters:
- data (Dict): Dictionary with keys: entity_type, start, end, score
Returns:
RecognizerResult: New instance from JSON data
"""
def has_conflict(self, other: "RecognizerResult") -> bool:
"""
Check if this result conflicts with another result.
Parameters:
- other (RecognizerResult): Another recognizer result to check against
Returns:
bool: True if results conflict (overlap with different types or lower score)
"""
def intersects(self, other: "RecognizerResult") -> int:
"""
Check intersection with another result.
Parameters:
- other (RecognizerResult): Another recognizer result
Returns:
int: Number of intersecting characters (0 if no intersection)
"""
def equal_indices(self, other: "RecognizerResult") -> bool:
"""
Check if two results have the same start and end positions.
Parameters:
- other (RecognizerResult): Another recognizer result
Returns:
bool: True if indices match
"""
def contains(self, other: "RecognizerResult") -> bool:
"""
Check if this result contains another result within its boundaries.
Parameters:
- other (RecognizerResult): Another recognizer result
Returns:
bool: True if this result contains the other result
"""Usage Examples:
from presidio_anonymizer.entities import RecognizerResult
# Create from constructor
result = RecognizerResult(
entity_type="PERSON",
start=11,
end=19,
score=0.85
)
# Create from JSON (typically used in API scenarios)
json_data = {
"entity_type": "EMAIL_ADDRESS",
"start": 25,
"end": 42,
"score": 0.95
}
result = RecognizerResult.from_json(json_data)
# Check for conflicts
result1 = RecognizerResult("PERSON", 10, 20, 0.8)
result2 = RecognizerResult("LOCATION", 15, 25, 0.9)
if result1.has_conflict(result2):
print("Results overlap and may need conflict resolution")Configuration object that specifies which operator to use and its parameters for anonymizing specific entity types.
class OperatorConfig:
def __init__(self, operator_name: str, params: Dict = None):
"""
Create operator configuration.
Parameters:
- operator_name (str): Name of the operator ("replace", "mask", "encrypt", etc.)
- params (Dict, optional): Parameters for the operator
"""
@classmethod
def from_json(cls, params: Dict) -> "OperatorConfig":
"""
Create OperatorConfig from JSON dictionary.
Parameters:
- params (Dict): Dictionary with "type" key and operator parameters
Returns:
OperatorConfig: New configuration instance
"""
def __eq__(self, other: "OperatorConfig") -> bool:
"""Check equality with another OperatorConfig."""Usage Examples:
from presidio_anonymizer.entities import OperatorConfig
# Simple operator without parameters
redact_config = OperatorConfig("redact")
# Operator with parameters
mask_config = OperatorConfig("mask", {
"masking_char": "*",
"chars_to_mask": 4,
"from_end": True
})
# Complex operator configuration
encrypt_config = OperatorConfig("encrypt", {
"key": "my-secret-key-32-characters-long12"
})
# Create from JSON (API scenarios)
json_config = {
"type": "replace",
"new_value": "[CONFIDENTIAL]"
}
replace_config = OperatorConfig.from_json(json_config)
# Use in operators dictionary
operators = {
"PERSON": mask_config,
"EMAIL_ADDRESS": encrypt_config,
"PHONE_NUMBER": redact_config
}Contains the results of anonymization or deanonymization operations, including the transformed text and metadata.
class EngineResult:
"""
Result of anonymization or deanonymization operation.
Attributes:
- text (str): The transformed text
- items (List[OperatorResult]): Metadata about applied transformations
"""
def __init__(self, text: str = None, items: List[OperatorResult] = None):
"""
Initialize EngineResult.
Parameters:
- text (str, optional): The transformed text
- items (List[OperatorResult], optional): Metadata about transformations, defaults to empty list
"""
def to_json(self) -> str:
"""
Convert result to JSON string.
Returns:
str: JSON representation of the result
"""
def set_text(self, text: str) -> None:
"""
Set the text attribute.
Parameters:
- text (str): The text to set
"""
def add_item(self, item: OperatorResult) -> None:
"""
Add an operator result item.
Parameters:
- item (OperatorResult): The operator result to add
"""Usage Example:
from presidio_anonymizer import AnonymizerEngine
engine = AnonymizerEngine()
result = engine.anonymize(text, analyzer_results, operators)
print(f"Anonymized text: {result.text}")
print(f"Number of transformations: {len(result.items)}")
# Access transformation details
for item in result.items:
print(f"Entity {item.entity_type} at {item.start}-{item.end} -> '{item.text}'")
# Convert to JSON for API responses
json_result = result.to_json()Metadata about individual operator transformations applied during anonymization.
class OperatorResult:
"""
Result of applying an operator to a PII entity.
Attributes:
- start (int): Start position of transformed text
- end (int): End position of transformed text
- entity_type (str): Type of PII entity that was transformed
- text (str): The replacement text that was inserted
- operator (str): Name of operator that was applied
"""
def __init__(self, start: int, end: int, entity_type: str, text: str = None, operator: str = None):
"""
Initialize OperatorResult.
Parameters:
- start (int): Start position of transformed text
- end (int): End position of transformed text
- entity_type (str): Type of PII entity that was transformed
- text (str, optional): The replacement text that was inserted
- operator (str, optional): Name of operator that was applied
"""
def to_dict(self) -> Dict:
"""
Convert to dictionary representation.
Returns:
Dict: Dictionary with all attributes
"""
@classmethod
def from_json(cls, json: Dict) -> "OperatorResult":
"""
Create OperatorResult from JSON dictionary.
Parameters:
- json (Dict): Dictionary with operator result data
Returns:
OperatorResult: New instance from JSON data
"""Usage Example:
# Access operator results from engine result
result = engine.anonymize(text, analyzer_results, operators)
for op_result in result.items:
print(f"Applied {op_result.operator} to {op_result.entity_type}")
print(f"Position: {op_result.start}-{op_result.end}")
print(f"Replacement: '{op_result.text}'")Base class for PII entities with position information.
class PIIEntity:
"""
Base class for PII entities with position information.
Attributes:
- start (int): Start position in text
- end (int): End position in text
- entity_type (str): Type of PII entity
"""
def __init__(self, start: int, end: int, entity_type: str):
"""
Initialize PIIEntity.
Parameters:
- start (int): Start position in text
- end (int): End position in text
- entity_type (str): Type of PII entity
"""Specialized recognizer result for structured data like dictionaries and nested objects.
@dataclass
class DictRecognizerResult:
"""
Recognizer result for dictionary/structured data analysis.
This is a dataclass that represents analyzer results for structured data like dictionaries.
Attributes:
- key (str): Dictionary key or field name
- value (Union[str, List[str], dict]): The value associated with the key
- recognizer_results (Union[List[RecognizerResult], List[List[RecognizerResult]], Iterator["DictRecognizerResult"]]): Analyzer results for the value
"""Usage Example:
from presidio_anonymizer.entities import DictRecognizerResult
# Used with BatchAnonymizerEngine for dictionary data
dict_result = DictRecognizerResult(
key="user_email",
value="john@example.com",
recognizer_results=[RecognizerResult("EMAIL_ADDRESS", 0, 16, 0.9)]
)Enumeration defining strategies for handling overlapping PII entities.
class ConflictResolutionStrategy(Enum):
MERGE_SIMILAR_OR_CONTAINED = "merge_similar_or_contained"
REMOVE_INTERSECTIONS = "remove_intersections"Usage Example:
from presidio_anonymizer.entities import ConflictResolutionStrategy
# Use in anonymization calls
result = engine.anonymize(
text=text,
analyzer_results=analyzer_results,
operators=operators,
conflict_resolution=ConflictResolutionStrategy.REMOVE_INTERSECTIONS
)Custom exceptions for error handling.
class InvalidParamError(Exception):
"""Exception raised for invalid parameters."""
def __init__(self, msg: str):
"""
Initialize InvalidParamError.
Parameters:
- msg (str): Error message
"""Usage Example:
from presidio_anonymizer.entities import InvalidParamError
try:
result = engine.anonymize(text, analyzer_results, invalid_operators)
except InvalidParamError as e:
print(f"Invalid configuration: {e}")RecognizerResult + OperatorConfig → Engine → EngineResultEngineResult contains OperatorResult itemsDictRecognizerResult for structured dataInvalidParamError for configuration issuesPIIEntity provides common position functionalityMost entities support JSON serialization for API integration:
# RecognizerResult from JSON
result = RecognizerResult.from_json({
"entity_type": "PERSON",
"start": 0,
"end": 8,
"score": 0.9
})
# OperatorConfig from JSON
config = OperatorConfig.from_json({
"type": "mask",
"masking_char": "*",
"chars_to_mask": 4
})
# EngineResult to JSON
json_output = engine_result.to_json()Install with Tessl CLI
npx tessl i tessl/pypi-presidio-anonymizer