Formats docstrings to follow PEP 257 conventions with support for various docstring styles and Black formatter compatibility
—
File encoding detection, line ending handling, and file opening utilities for robust text file processing across different encodings and platforms. This module ensures docformatter can handle files with various encodings and line ending conventions.
The main class for handling file encoding detection and file I/O operations.
class Encoder:
"""Encoding and decoding of files."""
# Line ending constants
CR = "\r" # Carriage return (Mac classic)
LF = "\n" # Line feed (Unix/Linux)
CRLF = "\r\n" # Carriage return + Line feed (Windows)
# Default encoding
DEFAULT_ENCODING = "latin-1"
def __init__(self):
"""
Initialize an Encoder instance.
Sets up encoding detection with default fallback encoding
and system encoding detection.
"""
# Instance attributes after initialization
encoding: str # Current detected/set file encoding
system_encoding: str # System preferred encodingMethods for detecting and working with file encodings.
def do_detect_encoding(self, filename) -> None:
"""
Detect and set the encoding for a file.
Uses charset_normalizer library to detect file encoding with high
accuracy. Falls back to DEFAULT_ENCODING if detection fails.
Args:
filename (str): Path to file for encoding detection
Side Effects:
Sets self.encoding to detected encoding
"""Methods for detecting and normalizing line endings.
def do_find_newline(self, source: List[str]) -> str:
"""
Determine the predominant newline style in source lines.
Analyzes line endings to determine whether file uses Unix (LF),
Windows (CRLF), or Mac classic (CR) line endings.
Args:
source (List[str]): List of source code lines
Returns:
str: Predominant newline character(s) (LF, CRLF, or CR)
"""Methods for opening files with proper encoding handling.
def do_open_with_encoding(self, filename, mode: str = "r"):
"""
Open file with detected encoding.
Opens file using the encoding detected by do_detect_encoding().
Handles encoding errors gracefully.
Args:
filename (str): Path to file to open
mode (str): File opening mode (default: "r")
Returns:
File object opened with proper encoding
Raises:
IOError: If file cannot be opened
UnicodeDecodeError: If encoding is incorrect
"""File discovery and processing utilities.
def find_py_files(sources, recursive, exclude=None):
"""
Find Python source files in given sources.
Generator function that yields Python files (.py extension)
from the specified sources, with support for recursive directory
traversal and exclusion patterns.
Args:
sources: Iterable of file/directory paths
recursive (bool): Whether to search directories recursively
exclude (list, optional): Patterns to exclude from search
Yields:
str: Path to each Python file found
"""
def has_correct_length(length_range, start, end):
"""
Check if docstring is within specified length range.
Used with --docstring-length option to filter docstrings
by their line count.
Args:
length_range (list): [min_length, max_length] or None
start (int): Starting line number of docstring
end (int): Ending line number of docstring
Returns:
bool: True if within range or no range specified
"""
def is_in_range(line_range, start, end):
"""
Check if docstring is within specified line range.
Used with --range option to process only docstrings
within specific line numbers.
Args:
line_range (list): [start_line, end_line] or None
start (int): Starting line number of docstring
end (int): Ending line number of docstring
Returns:
bool: True if in range or no range specified
"""from docformatter import Encoder
# Create encoder instance
encoder = Encoder()
# Detect encoding for a file
encoder.do_detect_encoding("example.py")
print(f"Detected encoding: {encoder.encoding}")
print(f"System encoding: {encoder.system_encoding}")
# Open file with detected encoding
with encoder.do_open_with_encoding("example.py") as f:
content = f.read()
print(f"File content length: {len(content)}")from docformatter import Encoder
# Read file and detect line endings
encoder = Encoder()
encoder.do_detect_encoding("mixed_endings.py")
with encoder.do_open_with_encoding("mixed_endings.py") as f:
lines = f.readlines()
# Detect predominant line ending
newline_style = encoder.do_find_newline(lines)
print(f"Detected line ending: {repr(newline_style)}")
if newline_style == encoder.LF:
print("Unix/Linux line endings")
elif newline_style == encoder.CRLF:
print("Windows line endings")
elif newline_style == encoder.CR:
print("Mac classic line endings")from docformatter import Encoder
def process_python_file(filename):
"""Process a Python file with proper encoding handling."""
encoder = Encoder()
# Detect encoding
try:
encoder.do_detect_encoding(filename)
print(f"Processing {filename} with encoding: {encoder.encoding}")
# Read file content
with encoder.do_open_with_encoding(filename) as f:
lines = f.readlines()
# Detect line endings
newline_style = encoder.do_find_newline(lines)
# Process content (example: count docstrings)
content = ''.join(lines)
docstring_count = content.count('"""') + content.count("'''")
return {
'filename': filename,
'encoding': encoder.encoding,
'line_ending': newline_style,
'line_count': len(lines),
'docstring_markers': docstring_count
}
except Exception as e:
print(f"Error processing {filename}: {e}")
return None
# Example usage
result = process_python_file("example.py")
if result:
print(f"File info: {result}")from docformatter import find_py_files
# Find all .py files in current directory
files = list(find_py_files(["."], recursive=False))
print(f"Found {len(files)} Python files")
# Find files recursively, excluding test directories
files = list(find_py_files(
["."],
recursive=True,
exclude=["tests", "__pycache__", ".git"]
))
print(f"Found {len(files)} Python files (excluding tests)")
# Process multiple source locations
sources = ["src/", "scripts/", "tools/"]
for filename in find_py_files(sources, recursive=True):
print(f"Processing: {filename}")from docformatter import has_correct_length, is_in_range
# Check docstring length filtering
length_range = [5, 20] # Only process docstrings 5-20 lines long
start_line = 10
end_line = 15
if has_correct_length(length_range, start_line, end_line):
print("Docstring is within length range")
# Check line range filtering
line_range = [1, 100] # Only process docstrings in lines 1-100
if is_in_range(line_range, start_line, end_line):
print("Docstring is within line range")
# Example usage in file processing
def should_process_docstring(start, end, length_filter=None, line_filter=None):
"""Determine if docstring should be processed based on filters."""
if length_filter and not has_correct_length(length_filter, start, end):
return False
if line_filter and not is_in_range(line_filter, start, end):
return False
return True
# Test with various docstrings
docstrings = [
(5, 8), # Lines 5-8 (4 lines)
(10, 25), # Lines 10-25 (16 lines)
(50, 75), # Lines 50-75 (26 lines)
]
for start, end in docstrings:
should_process = should_process_docstring(
start, end,
length_filter=[3, 20], # 3-20 lines
line_filter=[1, 30] # Lines 1-30
)
print(f"Docstring lines {start}-{end}: {'Process' if should_process else 'Skip'}")from docformatter import Encoder, find_py_files
class FileProcessor:
def __init__(self):
self.encoder = Encoder()
self.processed_files = []
def process_directory(self, directory, recursive=True, exclude=None):
"""Process all Python files in directory."""
files = find_py_files([directory], recursive, exclude)
for filename in files:
try:
result = self.process_file(filename)
if result:
self.processed_files.append(result)
except Exception as e:
print(f"Error processing {filename}: {e}")
return self.processed_files
def process_file(self, filename):
"""Process individual file with encoding detection."""
# Detect encoding
self.encoder.do_detect_encoding(filename)
# Read file
with self.encoder.do_open_with_encoding(filename) as f:
lines = f.readlines()
# Analyze file
newline_style = self.encoder.do_find_newline(lines)
return {
'filename': filename,
'encoding': self.encoder.encoding,
'line_ending': repr(newline_style),
'lines': len(lines),
'size': sum(len(line.encode(self.encoder.encoding)) for line in lines)
}
# Usage
processor = FileProcessor()
results = processor.process_directory(
"src/",
recursive=True,
exclude=["__pycache__", "*.pyc", "tests/"]
)
# Print summary
for result in results:
print(f"{result['filename']}: {result['encoding']} encoding, "
f"{result['lines']} lines, {result['size']} bytes")from docformatter import Encoder
def safe_file_processing(filename):
"""Process file with comprehensive error handling."""
encoder = Encoder()
try:
# Try to detect encoding
encoder.do_detect_encoding(filename)
print(f"Detected encoding: {encoder.encoding}")
except FileNotFoundError:
print(f"File not found: {filename}")
return None
except PermissionError:
print(f"Permission denied: {filename}")
return None
except Exception as e:
print(f"Encoding detection failed: {e}")
print(f"Using fallback encoding: {encoder.DEFAULT_ENCODING}")
encoder.encoding = encoder.DEFAULT_ENCODING
try:
# Try to open and read file
with encoder.do_open_with_encoding(filename) as f:
content = f.read()
return {
'success': True,
'encoding': encoder.encoding,
'content_length': len(content)
}
except UnicodeDecodeError as e:
print(f"Unicode decode error: {e}")
print("File may have mixed encodings or be binary")
return None
except Exception as e:
print(f"File reading error: {e}")
return None
# Test with various files
test_files = ["example.py", "unicode_file.py", "binary_file.so", "missing.py"]
for filename in test_files:
result = safe_file_processing(filename)
if result:
print(f"Successfully processed {filename}")
else:
print(f"Failed to process {filename}")The file I/O and encoding module integrates with other components:
The module handles platform-specific differences:
Install with Tessl CLI
npx tessl i tessl/pypi-docformatter