Style checker for Sphinx (or other) RST documentation
npx @tessl/cli install tessl/pypi-doc8@2.0.0An opinionated style checker for reStructuredText (RST) documentation that helps developers maintain consistent formatting and style in their documentation. doc8 validates RST format, enforces line length limits with intelligent exceptions for URLs and literal blocks, prevents trailing whitespace and tab indentation, ensures Unix newlines, and checks for proper file endings.
pip install doc8import doc8For programmatic use:
from doc8 import doc8For accessing version information:
from doc8 import __version__# Check all documentation files in current directory
doc8
# Check specific directory
doc8 my-project/docs
# Check with specific options
doc8 --max-line-length=99 --ignore=D001 docs/
# Check with configuration file
doc8 --config=my-doc8.ini docs/from doc8 import doc8
# Basic validation with default settings
result = doc8()
# Custom validation with options
result = doc8(
paths=['docs/'],
max_line_length=99,
ignore=['D001'],
allow_long_titles=True
)
# Check results
print(f"Total errors: {result.total_errors}")
print(f"Files checked: {result.files_selected}")
print(f"Files ignored: {result.files_ignored}")
# Get detailed report
print(result.report())
# Access individual errors
for check_name, filename, line_num, code, message in result.errors:
print(f"{filename}:{line_num}: {code} {message}")doc8 follows a modular check-based architecture:
Primary interface for doc8 validation functionality, supporting both programmatic and command-line usage with extensive configuration options.
def doc8(args=None, **kwargs):
"""
Execute doc8 validation on documentation files.
Args:
args (dict, optional): Configuration arguments dictionary
**kwargs: Configuration options as keyword arguments
Returns:
Result: Validation results with error details and statistics
"""def main():
"""
Command-line interface entry point for doc8.
Returns:
int: Exit code (0 for success, 1 for errors)
"""Functions for loading and managing doc8 configuration from files and command-line arguments.
def get_defaults():
"""
Get default configuration settings.
Returns:
dict: Default configuration dictionary
"""def extract_config(args):
"""
Extract configuration from INI or TOML files.
Args:
args (dict): Command-line arguments containing config file paths
Returns:
dict: Configuration dictionary parsed from files
"""def from_ini(fp):
"""
Parse doc8 configuration from INI file.
Args:
fp (str): File path to INI configuration file
Returns:
dict: Configuration dictionary
"""def from_toml(fp):
"""
Parse doc8 configuration from TOML file.
Args:
fp (str): File path to TOML configuration file
Returns:
dict: Configuration dictionary
"""Functions for discovering, scanning, and parsing documentation files for validation.
def scan(cfg):
"""
Scan directories for documentation files to validate.
Args:
cfg (dict): Configuration dictionary with paths, extensions, and ignore patterns
Returns:
tuple: (list of ParsedFile objects, count of ignored files)
"""def validate(cfg, files, result=None):
"""
Run validation checks on parsed documentation files.
Args:
cfg (dict): Configuration dictionary
files (list): List of ParsedFile objects to validate
result (Result, optional): Result object to populate with errors
Returns:
dict: Error counts by check name
"""def fetch_checks(cfg):
"""
Load built-in and extension validation checks.
Args:
cfg (dict): Configuration dictionary
Returns:
list: List of check objects implementing validation interfaces
"""Classes and functions for parsing documentation files and creating structured representations.
def parse(filename, encoding=None, default_extension=""):
"""
Parse a documentation file into a structured representation.
Args:
filename (str): Path to file to parse
encoding (str, optional): Text encoding for file reading
default_extension (str, optional): Extension to use if file has none
Returns:
ParsedFile: Parsed file object with content and metadata
"""class ParsedFile:
"""
Parsed documentation file with RST processing capabilities.
Attributes:
FALLBACK_ENCODING (str): Default encoding when none specified ("utf-8")
"""
def __init__(self, filename, encoding=None, default_extension=""):
"""
Initialize parsed file.
Args:
filename (str): Path to the file
encoding (str, optional): Text encoding
default_extension (str, optional): Default extension if none present
"""
@property
def errors(self):
"""
RST parsing errors from restructuredtext_lint.
Returns:
list: List of parsing error objects
"""
@property
def document(self):
"""
Docutils document object for RST content.
Returns:
docutils.nodes.document: Parsed RST document
"""
@property
def lines(self):
"""
Raw byte lines from file.
Returns:
list: List of byte strings representing file lines
"""
@property
def extension(self):
"""
File extension.
Returns:
str: File extension including dot (e.g., ".rst")
"""
@property
def filename(self):
"""
File path.
Returns:
str: Absolute path to the file
"""
@property
def encoding(self):
"""
Text encoding used for file.
Returns:
str: Character encoding name
"""
@property
def raw_contents(self):
"""
Raw file contents as bytes.
Returns:
bytes: File contents
"""
@property
def contents(self):
"""
Decoded file contents as string.
Returns:
str: File contents as text
"""
def lines_iter(self, remove_trailing_newline=True):
"""
Iterate over decoded text lines.
Args:
remove_trailing_newline (bool): Whether to strip trailing newlines
Yields:
str: Text lines from file
"""Base classes and concrete implementations for documentation validation checks.
class ContentCheck:
"""
Abstract base class for checks that operate on entire file content.
"""
def __init__(self, cfg):
"""
Initialize check with configuration.
Args:
cfg (dict): Configuration dictionary
"""
def report_iter(self, parsed_file):
"""
Generate validation results for file content.
Args:
parsed_file (ParsedFile): File to validate
Yields:
tuple: (line_number, error_code, message) for each error found
"""class LineCheck:
"""
Abstract base class for checks that operate line-by-line.
"""
def __init__(self, cfg):
"""
Initialize check with configuration.
Args:
cfg (dict): Configuration dictionary
"""
def report_iter(self, line):
"""
Generate validation results for a single line.
Args:
line (str): Text line to validate
Yields:
tuple: (error_code, message) for each error found
"""class CheckTrailingWhitespace(LineCheck):
"""
Check for trailing whitespace in lines.
Attributes:
REPORTS (frozenset): Error codes reported (["D002"])
"""class CheckIndentationNoTab(LineCheck):
"""
Check for tab characters in indentation.
Attributes:
REPORTS (frozenset): Error codes reported (["D003"])
"""class CheckCarriageReturn(ContentCheck):
"""
Check for carriage return characters in file content.
Attributes:
REPORTS (frozenset): Error codes reported (["D004"])
"""class CheckNewlineEndOfFile(ContentCheck):
"""
Check that file ends with a newline character.
Attributes:
REPORTS (frozenset): Error codes reported (["D005"])
"""class CheckValidity(ContentCheck):
"""
Check RST syntax validity using docutils.
Attributes:
REPORTS (frozenset): Error codes reported (["D000"])
EXT_MATCHER (re.Pattern): Regex matching .rst files
WARN_LEVELS (frozenset): Docutils warning levels to report
SPHINX_IGNORES_REGEX (list): Patterns to ignore in Sphinx mode
"""class CheckMaxLineLength(ContentCheck):
"""
Check line length limits with RST-aware exceptions.
Attributes:
REPORTS (frozenset): Error codes reported (["D001"])
"""Classes for handling validation results and generating reports.
class Result:
"""
Container for doc8 validation results and statistics.
"""
def __init__(self):
"""Initialize empty result."""
@property
def total_errors(self):
"""
Total number of errors found.
Returns:
int: Count of all errors
"""
def error(self, check_name, filename, line_num, code, message):
"""
Record a validation error.
Args:
check_name (str): Name of check that found error
filename (str): File path where error occurred
line_num (int): Line number of error
code (str): Error code (D000-D005)
message (str): Human-readable error description
"""
def finish(self, files_selected, files_ignored, error_counts):
"""
Finalize results with file and error statistics.
Args:
files_selected (int): Number of files processed
files_ignored (int): Number of files ignored
error_counts (dict): Error counts by check name
"""
def report(self):
"""
Generate human-readable validation report.
Returns:
str: Formatted report with error details and statistics
"""Helper functions for file discovery, RST processing, and configuration parsing.
def find_files(paths, extensions, ignored_paths):
"""
Recursively find documentation files matching extensions.
Args:
paths (list): Root paths to search
extensions (set): File extensions to include
ignored_paths (list): Paths to ignore (supports globs)
Yields:
tuple: (filepath, is_ignorable) for each matching file
"""def filtered_traverse(document, filter_func):
"""
Traverse docutils document tree with filtering.
Args:
document (docutils.nodes.document): RST document to traverse
filter_func (callable): Function to filter nodes
Yields:
docutils.nodes.Node: Filtered document nodes
"""def contains_url(line):
"""
Check if text line contains HTTP or HTTPS URLs.
Args:
line (str): Text line to check
Returns:
bool: True if line contains URLs
"""def has_any_node_type(node, node_types):
"""
Check if node or its ancestors match given types.
Args:
node (docutils.nodes.Node): Node to check
node_types (tuple): Node types to match against
Returns:
bool: True if node or ancestor matches types
"""def split_set_type(text, delimiter=","):
"""
Parse delimited text into set of values.
Args:
text (str): Delimited string to parse
delimiter (str): Delimiter character
Returns:
set: Set of parsed string values
"""def merge_sets(sets):
"""
Merge multiple sets into single set.
Args:
sets (iterable): Sets to merge
Returns:
set: Combined set containing all values
"""def parse_ignore_path_errors(entries):
"""
Parse path-specific error ignore patterns.
Args:
entries (list): List of "path;error_code;error_code" entries
Returns:
dict: Mapping of paths to sets of ignored error codes
"""def setup_logging(verbose):
"""
Configure logging output based on verbosity setting.
Args:
verbose (bool): Enable verbose logging output
"""# Configuration dictionary keys and their types
ConfigDict = {
"paths": list, # Paths to scan for files
"config": list, # Configuration file paths
"allow_long_titles": bool, # Allow long section titles
"ignore": set, # Error codes to ignore
"sphinx": bool, # Enable Sphinx-specific ignores
"ignore_path": list, # Paths to ignore (globs supported)
"ignore_path_errors": dict, # Path-specific error ignores
"default_extension": str, # Default file extension
"file_encoding": str, # Text encoding for files
"max_line_length": int, # Maximum line length
"extension": list, # File extensions to check
"quiet": bool, # Suppress non-error output
"verbose": bool, # Enable verbose output
"version": bool, # Show version and exit
}# Validation error codes
ERROR_CODES = {
"D000": "Invalid RST format",
"D001": "Line too long",
"D002": "Trailing whitespace",
"D003": "Tab indentation",
"D004": "Carriage returns (non-Unix newlines)",
"D005": "No newline at end of file",
}# Default file extensions to check
FILE_PATTERNS = [".rst", ".txt"]
# Default maximum line length
MAX_LINE_LENGTH = 79
# Configuration file search order
CONFIG_FILENAMES = [
"doc8.ini",
".config/doc8.ini",
"tox.ini",
"pep8.ini",
"setup.cfg",
"pyproject.toml",
]doc8 automatically searches for configuration in these files:
doc8.ini.config/doc8.initox.inipep8.inisetup.cfgpyproject.toml[doc8]
max-line-length = 99
ignore = D001,D002
ignore-path = build/,temp/
ignore-path-errors = docs/legacy.rst;D001;D002
allow-long-titles = true
verbose = false
sphinx = true
file-encoding = utf-8
default-extension = .rst
extensions = .rst,.txt[tool.doc8]
max-line-length = 99
ignore = ["D001", "D002"]
ignore-path = ["build/", "temp/"]
ignore-path-errors = ["docs/legacy.rst;D001;D002"]
allow-long-titles = true
verbose = false
sphinx = true
file-encoding = "utf-8"
default-extension = ".rst"
extensions = [".rst", ".txt"]doc8 supports custom validation checks through the stevedore plugin system:
doc8.extension.checkContentCheck or LineCheckfrom doc8.checks import LineCheck
class MyCustomCheck(LineCheck):
REPORTS = frozenset(["C001"])
def __init__(self, cfg):
super().__init__(cfg)
def report_iter(self, line):
if "bad_pattern" in line:
yield ("C001", "Found bad pattern")
# Register in setup.py or pyproject.toml:
# [project.entry-points."doc8.extension.check"]
# my_check = "mypackage.checks:MyCustomCheck"