Python-based service for detecting PII entities in text using Named Entity Recognition, regular expressions, rule-based logic, and checksums
—
Presidio Analyzer includes over 50 built-in recognizers for common PII types, organized into generic entities, country-specific identifiers, NLP-based recognizers, and third-party service integrations.
Universal PII types that apply across multiple countries and contexts.
class EmailRecognizer(PatternRecognizer):
"""
Detects email addresses using comprehensive regex patterns.
Supported entities: ["EMAIL_ADDRESS"]
Languages: Multi-language support
"""class PhoneRecognizer(PatternRecognizer):
"""
Detects phone numbers in various international formats.
Supported entities: ["PHONE_NUMBER"]
Languages: Multi-language support with region-specific patterns
"""class CreditCardRecognizer(PatternRecognizer):
"""
Detects credit card numbers with format validation.
Supported entities: ["CREDIT_CARD"]
Languages: Multi-language support
Features: Supports major card types (Visa, MasterCard, American Express, etc.)
"""class UrlRecognizer(PatternRecognizer):
"""
Detects URLs and web addresses.
Supported entities: ["URL"]
Languages: Multi-language support
Features: HTTP/HTTPS, FTP, and other protocol detection
"""class IpRecognizer(PatternRecognizer):
"""
Detects IPv4 and IPv6 addresses.
Supported entities: ["IP_ADDRESS"]
Languages: Multi-language support
Features: Validates IP address format
"""class CryptoRecognizer(PatternRecognizer):
"""
Detects cryptocurrency wallet addresses.
Supported entities: ["CRYPTO"]
Languages: Multi-language support
Features: Bitcoin, Ethereum, and other major cryptocurrencies
"""class IbanRecognizer(PatternRecognizer):
"""
Detects International Bank Account Numbers.
Supported entities: ["IBAN_CODE"]
Languages: Multi-language support
Features: IBAN format validation and country code verification
"""class DateRecognizer(PatternRecognizer):
"""
Detects date patterns in various formats.
Supported entities: ["DATE_TIME"]
Languages: Multi-language support
Features: Multiple date formats (MM/DD/YYYY, DD-MM-YYYY, etc.)
"""PII types specific to the United States.
class UsSsnRecognizer(PatternRecognizer):
"""
Detects US Social Security Numbers with validation.
Supported entities: ["US_SSN"]
Languages: ["en"]
Features: Format validation and invalid number filtering
"""class UsLicenseRecognizer(PatternRecognizer):
"""
Detects US driver license numbers for all 50 states.
Supported entities: ["US_DRIVER_LICENSE"]
Languages: ["en"]
Features: State-specific format patterns
"""class UsPassportRecognizer(PatternRecognizer):
"""
Detects US passport numbers.
Supported entities: ["US_PASSPORT"]
Languages: ["en"]
Features: Current and legacy passport number formats
"""class UsBankRecognizer(PatternRecognizer):
"""
Detects US bank account numbers.
Supported entities: ["US_BANK_NUMBER"]
Languages: ["en"]
Features: Account number pattern recognition
"""class UsItinRecognizer(PatternRecognizer):
"""
Detects US Individual Taxpayer Identification Numbers.
Supported entities: ["US_ITIN"]
Languages: ["en"]
Features: ITIN format validation
"""class AbaRoutingRecognizer(PatternRecognizer):
"""
Detects US ABA routing numbers with checksum validation.
Supported entities: ["ABA_ROUTING_NUMBER"]
Languages: ["en"]
Features: 9-digit routing number validation
"""class MedicalLicenseRecognizer(PatternRecognizer):
"""
Detects US medical license numbers.
Supported entities: ["MEDICAL_LICENSE"]
Languages: ["en"]
Features: State-specific medical license patterns
"""class NhsRecognizer(PatternRecognizer):
"""
Detects UK NHS (National Health Service) numbers.
Supported entities: ["UK_NHS"]
Languages: ["en"]
Features: NHS number format validation
"""class UkNinoRecognizer(PatternRecognizer):
"""
Detects UK National Insurance Numbers.
Supported entities: ["UK_NINO"]
Languages: ["en"]
Features: NINO format validation and invalid prefix filtering
"""class ItFiscalCodeRecognizer(PatternRecognizer):
"""
Detects Italian fiscal codes (Codice Fiscale).
Supported entities: ["IT_FISCAL_CODE"]
Languages: ["en", "it"]
Features: Fiscal code format validation
"""
class ItDriverLicenseRecognizer(PatternRecognizer):
"""
Detects Italian driver license numbers.
Supported entities: ["IT_DRIVER_LICENSE"]
Languages: ["en", "it"]
"""
class ItVatCodeRecognizer(PatternRecognizer):
"""
Detects Italian VAT codes.
Supported entities: ["IT_VAT_CODE"]
Languages: ["en", "it"]
Features: VAT code validation
"""
class ItIdentityCardRecognizer(PatternRecognizer):
"""
Detects Italian identity card numbers.
Supported entities: ["IT_IDENTITY_CARD"]
Languages: ["en", "it"]
"""
class ItPassportRecognizer(PatternRecognizer):
"""
Detects Italian passport numbers.
Supported entities: ["IT_PASSPORT"]
Languages: ["en", "it"]
"""class EsNifRecognizer(PatternRecognizer):
"""
Detects Spanish NIF (National Identity Document) numbers.
Supported entities: ["ES_NIF"]
Languages: ["en", "es"]
Features: NIF checksum validation
"""
class EsNieRecognizer(PatternRecognizer):
"""
Detects Spanish NIE (Foreign Identity Number) numbers.
Supported entities: ["ES_NIE"]
Languages: ["en", "es"]
Features: NIE format validation
"""class PlPeselRecognizer(PatternRecognizer):
"""
Detects Polish PESEL (Personal Identity Number) numbers.
Supported entities: ["PL_PESEL"]
Languages: ["en", "pl"]
Features: PESEL checksum validation
"""class FiPersonalIdentityCodeRecognizer(PatternRecognizer):
"""
Detects Finnish personal identity codes.
Supported entities: ["FI_PERSONAL_IDENTITY_CODE"]
Languages: ["en", "fi"]
Features: Finnish ID format validation
"""class AuAbnRecognizer(PatternRecognizer):
"""
Detects Australian Business Numbers (ABN).
Supported entities: ["AU_ABN"]
Languages: ["en"]
Features: ABN checksum validation
"""
class AuAcnRecognizer(PatternRecognizer):
"""
Detects Australian Company Numbers (ACN).
Supported entities: ["AU_ACN"]
Languages: ["en"]
Features: ACN format validation
"""
class AuTfnRecognizer(PatternRecognizer):
"""
Detects Australian Tax File Numbers (TFN).
Supported entities: ["AU_TFN"]
Languages: ["en"]
Features: TFN format validation
"""
class AuMedicareRecognizer(PatternRecognizer):
"""
Detects Australian Medicare numbers.
Supported entities: ["AU_MEDICARE"]
Languages: ["en"]
Features: Medicare number format validation
"""class SgFinRecognizer(PatternRecognizer):
"""
Detects Singapore FIN (Foreign Identification Number) numbers.
Supported entities: ["SG_NRIC_FIN"]
Languages: ["en"]
Features: FIN checksum validation
"""
class SgUenRecognizer(PatternRecognizer):
"""
Detects Singapore UEN (Unique Entity Number) numbers.
Supported entities: ["SG_UEN"]
Languages: ["en"]
Features: UEN format validation
"""class KrRrnRecognizer(PatternRecognizer):
"""
Detects Korean Resident Registration Numbers.
Supported entities: ["KR_RRN"]
Languages: ["en", "ko"]
Features: RRN format validation
"""class InAadhaarRecognizer(PatternRecognizer):
"""
Detects Indian Aadhaar (Unique Identity) numbers.
Supported entities: ["IN_AADHAAR"]
Languages: ["en"]
Features: Aadhaar format validation
"""
class InPanRecognizer(PatternRecognizer):
"""
Detects Indian PAN (Permanent Account Number) numbers.
Supported entities: ["IN_PAN"]
Languages: ["en"]
Features: PAN format validation
"""
class InPassportRecognizer(PatternRecognizer):
"""
Detects Indian passport numbers.
Supported entities: ["IN_PASSPORT"]
Languages: ["en"]
Features: Indian passport format patterns
"""
class InVehicleRegistrationRecognizer(PatternRecognizer):
"""
Detects Indian vehicle registration numbers.
Supported entities: ["IN_VEHICLE_REGISTRATION"]
Languages: ["en"]
Features: Indian vehicle registration format patterns
"""
class InVoterRecognizer(PatternRecognizer):
"""
Detects Indian voter ID numbers.
Supported entities: ["IN_VOTER"]
Languages: ["en"]
Features: Indian voter ID format validation
"""Recognizers that use Natural Language Processing models for entity detection.
class SpacyRecognizer(LocalRecognizer):
"""
Uses spaCy NLP models for named entity recognition.
Supported entities: ["PERSON", "LOCATION", "ORGANIZATION"] and others
Languages: Multiple languages supported by spaCy models
Features: Leverages spaCy's pre-trained NER models
"""
def __init__(
self,
supported_entities: List[str] = None,
check_label_groups: Tuple[Set, Set] = None,
supported_language: str = "en",
ner_strength: float = 0.85
): ...class StanzaRecognizer(LocalRecognizer):
"""
Uses Stanford Stanza NLP models for named entity recognition.
Supported entities: ["PERSON", "LOCATION", "ORGANIZATION"] and others
Languages: Multiple languages supported by Stanza
Features: Stanford NLP Group's state-of-the-art NER models
"""
def __init__(
self,
supported_entities: List[str] = None,
check_label_groups: Tuple[Set, Set] = None,
supported_language: str = "en",
ner_strength: float = 0.85
): ...class TransformersRecognizer(LocalRecognizer):
"""
Uses Hugging Face Transformers models for named entity recognition.
Supported entities: ["PERSON", "LOCATION", "ORGANIZATION"] and others
Languages: Multiple languages depending on model
Features: Supports BERT, RoBERTa, and other transformer models
"""
def __init__(
self,
model_id_or_path: str = None,
aggregation_strategy: str = "simple",
supported_entities: List[str] = None,
pipeline_kwargs: Dict = None,
model_kwargs: Dict = None
): ...Integrations with external PII detection services.
class AzureAILanguageRecognizer(RemoteRecognizer):
"""
Integrates with Azure AI Language service for PII detection.
Supported entities: Multiple PII types supported by Azure
Languages: Multiple languages supported by Azure AI
Features: Cloud-based detection with high accuracy
"""
def __init__(
self,
endpoint: str = None,
credential: str = None,
supported_entities: List[str] = None,
supported_language: str = "en"
): ...class AzureHealthDeidRecognizer(RemoteRecognizer):
"""
Integrates with Azure Health De-identification service.
Supported entities: Healthcare-specific PII types
Languages: ["en"]
Features: Specialized for healthcare and medical text
"""
def __init__(
self,
deid_service_name: str,
supported_entities: List[str] = None,
supported_language: str = "en"
): ...from presidio_analyzer import AnalyzerEngine
# Initialize analyzer
analyzer = AnalyzerEngine()
# Detect only US-specific PII
text = "John's SSN is 123-45-6789 and his driver license is D1234567"
results = analyzer.analyze(
text=text,
language="en",
entities=["US_SSN", "US_DRIVER_LICENSE"]
)
for result in results:
detected_text = text[result.start:result.end]
print(f"Found {result.entity_type}: {detected_text}")from presidio_analyzer import AnalyzerEngine
analyzer = AnalyzerEngine()
# Detect various international identifiers
text = """
Contact information:
- UK: NHS 123 456 7890
- Italy: Fiscal Code RSSMRA80A01H501U
- Spain: NIF 12345678Z
- Australia: ABN 12 345 678 901
"""
results = analyzer.analyze(text=text, language="en")
# Group results by entity type
entity_groups = {}
for result in results:
if result.entity_type not in entity_groups:
entity_groups[result.entity_type] = []
entity_groups[result.entity_type].append(text[result.start:result.end])
for entity_type, detected_values in entity_groups.items():
print(f"{entity_type}: {detected_values}")from presidio_analyzer import AnalyzerEngine
analyzer = AnalyzerEngine()
# Detect financial identifiers
text = """
Payment details:
- Credit Card: 4532-1234-5678-9012
- IBAN: GB82 WEST 1234 5698 7654 32
- ABA Routing: 121000248
- Account: 1234567890
"""
financial_entities = [
"CREDIT_CARD",
"IBAN_CODE",
"ABA_ROUTING_NUMBER",
"US_BANK_NUMBER"
]
results = analyzer.analyze(
text=text,
language="en",
entities=financial_entities
)
print(f"Found {len(results)} financial identifiers")
for result in results:
masked_value = "X" * (result.end - result.start)
print(f"{result.entity_type}: {masked_value} (score: {result.score:.2f})")from presidio_analyzer import AnalyzerEngine
# Configure for healthcare context
analyzer = AnalyzerEngine()
healthcare_text = """
Patient: John Smith (DOB: 01/15/1980)
SSN: 123-45-6789
Phone: 555-123-4567
Email: john.smith@email.com
Medical License: MD123456
"""
# Detect healthcare-relevant PII
healthcare_entities = [
"PERSON",
"DATE_TIME",
"US_SSN",
"PHONE_NUMBER",
"EMAIL_ADDRESS",
"MEDICAL_LICENSE"
]
results = analyzer.analyze(
text=healthcare_text,
language="en",
entities=healthcare_entities,
context=["patient", "medical", "healthcare", "doctor"]
)
print(f"Healthcare PII detected: {len(results)} items")from presidio_analyzer import AnalyzerEngine
analyzer = AnalyzerEngine()
# Prioritize certain entity types with higher thresholds
text = "Contact: john.doe@company.com, phone: 555-0123, SSN: 123-45-6789"
# High-confidence detection for sensitive data
sensitive_results = analyzer.analyze(
text=text,
language="en",
entities=["US_SSN"],
score_threshold=0.9 # Very high confidence only
)
# Standard detection for contact info
contact_results = analyzer.analyze(
text=text,
language="en",
entities=["EMAIL_ADDRESS", "PHONE_NUMBER"],
score_threshold=0.5 # Standard confidence
)
print(f"High-confidence sensitive data: {len(sensitive_results)}")
print(f"Contact information: {len(contact_results)}")Install with Tessl CLI
npx tessl i tessl/pypi-presidio-analyzer