Type stubs for chardet - Universal encoding detector for Python
npx @tessl/cli install tessl/pypi-types-chardet@4.0.0Type stubs for chardet - A universal character encoding detection library for Python. This package provides comprehensive type annotations for static type checking when using chardet to automatically detect the character encoding of text data from unknown sources.
pip install types-chardetpip install chardet)from chardet import UniversalDetectorThe package also supports dynamic attribute access via __getattr__:
import chardet
# Access the main detect function dynamically
detect_func = chardet.detect # Available at runtimeImport type annotations:
from typing import Dict, Any, Optional, List, Tuple
from typing_extensions import TypedDictfrom chardet import UniversalDetector
import chardet
# Simple detection using detect() function (available via __getattr__)
result = chardet.detect(b'Hello World')
# result: {'encoding': 'ascii', 'confidence': 1.0, 'language': ''}
# Advanced incremental detection with UniversalDetector
detector = UniversalDetector()
with open('unknown_encoding.txt', 'rb') as f:
for line in f:
detector.feed(line)
if detector.done:
break
result = detector.close()
# result: {'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}Simple detection function that analyzes byte sequences to identify character encoding.
def detect(byte_str: bytes) -> _FinalResultType:
"""
Detect character encoding of the given bytes.
Parameters:
- byte_str: bytes, data to analyze for encoding detection
Returns:
Dict with 'encoding', 'confidence', and 'language' keys
"""The package provides dynamic access to all chardet attributes through __getattr__. This allows access to functions like detect() and other runtime attributes that are not statically defined in the stubs.
def __getattr__(name: str) -> Any:
"""
Provide dynamic access to any chardet module attribute.
This mechanism enables access to runtime functions like detect() and
other chardet functionality that is available at runtime but not
statically defined in the type stubs.
Parameters:
- name: str, attribute name to access
Returns:
Any attribute from the chardet runtime module
"""The UniversalDetector class provides the core encoding detection functionality with support for incremental detection and language filtering.
class UniversalDetector:
MINIMUM_THRESHOLD: float
HIGH_BYTE_DETECTOR: Pattern[bytes]
ESC_DETECTOR: Pattern[bytes]
WIN_BYTE_DETECTOR: Pattern[bytes]
ISO_WIN_MAP: dict[str, str]
result: _IntermediateResultType
done: bool
lang_filter: int
logger: Logger
def __init__(self, lang_filter: int = ...) -> None:
"""
Initialize UniversalDetector.
Parameters:
- lang_filter: int, language filter using LanguageFilter constants
"""
def reset(self) -> None:
"""Reset the detector to initial state."""
def feed(self, byte_str: bytes) -> None:
"""
Feed bytes to the detector for analysis.
Parameters:
- byte_str: bytes, data to analyze
"""
def close(self) -> _FinalResultType:
"""
Close detection and return final result.
Returns:
Dict with 'encoding', 'confidence', and 'language' keys
"""State constants for controlling and monitoring the detection process.
class InputState:
PURE_ASCII: int
ESC_ASCII: int
HIGH_BYTE: int
class ProbingState:
DETECTING: int
FOUND_IT: int
NOT_ME: int
class MachineState:
START: int
ERROR: int
ITS_ME: intLanguage filter constants for focusing detection on specific language groups.
class LanguageFilter:
CHINESE_SIMPLIFIED: int
CHINESE_TRADITIONAL: int
JAPANESE: int
KOREAN: int
NON_CJK: int
ALL: int
CHINESE: int
CJK: intLikelihood and character categorization constants for internal sequence analysis.
class SequenceLikelihood:
NEGATIVE: int
UNLIKELY: int
LIKELY: int
POSITIVE: int
@classmethod
def get_num_categories(cls) -> int:
"""Get the number of likelihood categories."""
class CharacterCategory:
UNDEFINED: int
LINE_BREAK: int
SYMBOL: int
DIGIT: int
CONTROL: intLanguage-specific detection models with character mappings and precedence matrices.
# Greek language model
Latin7GreekModel: _LangModelType
Win1253GreekModel: _LangModelType
Latin7_char_to_order_map: tuple[int, ...]
win1253_char_to_order_map: tuple[int, ...]
GreekLangModel: tuple[int, ...]
# Bulgarian language model
Latin5BulgarianModel: _LangModelType
Win1251BulgarianModel: _LangModelType
Latin5_BulgarianCharToOrderMap: tuple[int, ...]
win1251BulgarianCharToOrderMap: tuple[int, ...]
BulgarianLangModel: tuple[int, ...]
# Cyrillic language models
Koi8rModel: _LangModelType
Win1251CyrillicModel: _LangModelType
Latin5CyrillicModel: _LangModelType
MacCyrillicModel: _LangModelType
Ibm866Model: _LangModelType
Ibm855Model: _LangModelType
KOI8R_char_to_order_map: tuple[int, ...]
win1251_char_to_order_map: tuple[int, ...]
latin5_char_to_order_map: tuple[int, ...]
macCyrillic_char_to_order_map: tuple[int, ...]
IBM855_char_to_order_map: tuple[int, ...]
IBM866_char_to_order_map: tuple[int, ...]
RussianLangModel: tuple[int, ...]
# Hebrew language model
Win1255HebrewModel: _LangModelType
WIN1255_CHAR_TO_ORDER_MAP: tuple[int, ...]
HEBREW_LANG_MODEL: tuple[int, ...]
# Hungarian language model
Latin2HungarianModel: _LangModelType
Win1250HungarianModel: _LangModelType
Latin2_HungarianCharToOrderMap: tuple[int, ...]
win1250HungarianCharToOrderMap: tuple[int, ...]
HungarianLangModel: tuple[int, ...]
# Thai language model
TIS620ThaiModel: _LangModelType
TIS620CharToOrderMap: tuple[int, ...]
ThaiLangModel: tuple[int, ...]
# Turkish language model
Latin5TurkishModel: _LangModelType
Latin5_TurkishCharToOrderMap: tuple[int, ...]
TurkishLangModel: tuple[int, ...]Package version information for compatibility checking.
__version__: str
VERSION: list[str]from typing import Pattern, Optional, List, Any
from typing_extensions import TypedDict
from logging import Logger
class _FinalResultType(TypedDict):
encoding: str
confidence: float
language: str
class _IntermediateResultType(TypedDict):
encoding: str | None
confidence: float
language: str | None
class _LangModelType(TypedDict):
char_to_order_map: tuple[int, ...]
precedence_matrix: tuple[int, ...]
typical_positive_ratio: float
keep_english_letter: bool
charset_name: str
language: str
class _SMModelType(TypedDict):
class_table: tuple[int, ...]
class_factor: int
state_table: tuple[int, ...]
char_len_table: tuple[int, ...]
name: str