Formats docstrings to follow PEP 257 conventions with support for various docstring styles and Black formatter compatibility
—
Text manipulation utilities for docstring processing including indentation detection, line normalization, summary formatting, and text splitting operations that form the foundation of docformatter's text processing capabilities.
Functions for analyzing and working with text indentation patterns.
def find_shortest_indentation(lines: List[str]) -> str:
"""
Determine the shortest indentation in a list of lines.
Args:
lines (List[str]): List of text lines to analyze
Returns:
str: The shortest indentation string found in non-empty lines
"""Utilities for normalizing line endings and line content.
def normalize_line(line: str, newline: str) -> str:
"""
Return line with fixed ending, if ending was present.
Args:
line (str): The line to normalize
newline (str): The newline character to use
Returns:
str: Line with normalized ending
"""
def normalize_line_endings(lines, newline):
"""
Return text with normalized line endings.
Args:
lines: Text lines to normalize
newline: Newline character to use
Returns:
str: Text with consistent line endings
"""Functions for processing and formatting docstring summaries.
def normalize_summary(summary: str, noncap: Optional[List[str]] = None) -> str:
"""
Return normalized docstring summary.
Normalizes summary by capitalizing first word (unless in noncap list)
and adding period at end if missing.
Args:
summary (str): The summary string to normalize
noncap (List[str], optional): Words not to capitalize when first
Returns:
str: Normalized summary with proper capitalization and punctuation
"""Functions for detecting and working with sentence boundaries.
def is_probably_beginning_of_sentence(line: str) -> Union[Match[str], None, bool]:
"""
Determine if the line begins a sentence.
Uses heuristics to detect parameter lists and sentence beginnings
by looking for specific patterns and tokens.
Args:
line (str): The line to test
Returns:
bool: True if line probably begins a sentence
"""Functions for splitting text into components.
def split_first_sentence(text):
"""
Split text into first sentence and remainder.
Handles common abbreviations and false sentence endings.
Recognizes periods, question marks, exclamation marks, and
colons at line endings as sentence boundaries.
Args:
text: Text to split
Returns:
tuple: (first_sentence, remaining_text)
"""
def split_summary_and_description(contents):
"""
Split docstring into summary and description parts.
Uses empty lines, sentence boundaries, and heuristics to
determine where summary ends and description begins.
Args:
contents: Docstring content to split
Returns:
tuple: (summary, description)
"""from docformatter import find_shortest_indentation
# Analyze indentation in code block
lines = [
" def function():",
" '''Docstring.",
" ",
" Description here.",
" '''",
" pass"
]
shortest = find_shortest_indentation(lines)
print(f"Shortest indentation: '{shortest}'") # " "from docformatter import normalize_line, normalize_line_endings
# Normalize single line
line = "Text with mixed endings\r\n"
normalized = normalize_line(line, "\n")
print(repr(normalized)) # "Text with mixed endings\n"
# Normalize multiple lines
text_lines = ["Line 1\r\n", "Line 2\r", "Line 3\n"]
normalized_text = normalize_line_endings(text_lines, "\n")
print(repr(normalized_text)) # "Line 1\nLine 2\nLine 3\n"from docformatter import normalize_summary
# Basic summary normalization
summary = "format docstrings according to pep 257"
normalized = normalize_summary(summary)
print(normalized) # "Format docstrings according to pep 257."
# With non-capitalization list
summary = "API documentation generator"
normalized = normalize_summary(summary, noncap=["API"])
print(normalized) # "API documentation generator."
# Already properly formatted
summary = "Process the input data."
normalized = normalize_summary(summary)
print(normalized) # "Process the input data." (unchanged)from docformatter import split_first_sentence, split_summary_and_description
# Split first sentence
text = "This is the first sentence. This is the second sentence."
first, rest = split_first_sentence(text)
print(f"First: '{first}'") # "This is the first sentence."
print(f"Rest: '{rest}'") # " This is the second sentence."
# Handle abbreviations
text = "See e.g. the documentation. More info follows."
first, rest = split_first_sentence(text)
print(f"First: '{first}'") # "See e.g. the documentation."
print(f"Rest: '{rest}'") # " More info follows."
# Split summary and description
docstring = """Process input data.
This function processes the input data according to
the specified parameters and returns the results.
Args:
data: Input data to process
"""
summary, description = split_summary_and_description(docstring)
print(f"Summary: '{summary}'")
print(f"Description: '{description}'")from docformatter import (
find_shortest_indentation,
normalize_summary,
split_summary_and_description
)
def process_docstring(docstring_content):
"""Process a complete docstring."""
# Split into parts
summary, description = split_summary_and_description(docstring_content)
# Normalize summary
normalized_summary = normalize_summary(summary)
# Analyze description indentation if present
if description:
desc_lines = description.splitlines()
base_indent = find_shortest_indentation(desc_lines)
print(f"Description base indentation: '{base_indent}'")
return normalized_summary, description
# Example usage
docstring = """process the data
This function processes input data and returns
processed results.
"""
summary, desc = process_docstring(docstring)
print(f"Processed summary: '{summary}'")from docformatter import is_probably_beginning_of_sentence
# Test various line types
test_lines = [
" - Parameter: description", # Bullet list
" @param name: description", # Epytext parameter
" :param name: description", # Sphinx parameter
" Normal sentence text", # Regular text
" ) Closing parenthesis", # Special case
]
for line in test_lines:
is_beginning = is_probably_beginning_of_sentence(line)
print(f"'{line.strip()}' -> {is_beginning}")from docformatter import split_summary_and_description, normalize_summary
def analyze_docstring(content):
"""Analyze docstring structure and content."""
summary, description = split_summary_and_description(content)
print(f"Summary length: {len(summary)}")
print(f"Has description: {bool(description.strip())}")
# Check if summary needs normalization
normalized = normalize_summary(summary)
needs_normalization = summary != normalized
return {
'summary': summary,
'description': description,
'normalized_summary': normalized,
'needs_normalization': needs_normalization,
'has_description': bool(description.strip())
}from docformatter import find_shortest_indentation
def preserve_relative_indentation(lines):
"""Preserve relative indentation while normalizing base level."""
base_indent = find_shortest_indentation(lines)
base_level = len(base_indent)
processed_lines = []
for line in lines:
if line.strip(): # Non-empty line
current_indent = len(line) - len(line.lstrip())
relative_indent = current_indent - base_level
new_line = " " + " " * relative_indent + line.lstrip()
processed_lines.append(new_line)
else:
processed_lines.append(line)
return processed_linesThe string processing functions integrate closely with other docformatter components:
String processing functions handle various edge cases:
Install with Tessl CLI
npx tessl i tessl/pypi-docformatter