A Python docstring linter that checks arguments, returns, yields, and raises sections
—
Support for baseline files that enable gradual adoption of pydoclint in large codebases by tracking existing violations and only reporting new ones.
Functions for creating and managing baseline files that track existing violations to enable gradual adoption.
def generateBaseline(
violationsAllFiles: dict[str, list[Violation]] | dict[str, list[str]],
path: Path,
) -> None:
"""
Generate baseline file based on passed violations.
Creates a baseline file containing all current violations, allowing
future runs to only report new violations not present in the baseline.
Parameters:
- violationsAllFiles: Mapping of file paths to their violations
- path: Path where baseline file should be written
The baseline file format:
- Each file section starts with the file path
- Violations are indented with 4 spaces
- File sections are separated by 20 dashes
"""
def parseBaseline(path: Path) -> dict[str, list[str]]:
"""
Parse existing baseline file.
Reads and parses a baseline file created by generateBaseline,
returning the violations organized by file path.
Parameters:
- path: Path to baseline file to parse
Returns:
dict[str, list[str]]: Mapping of file paths to violation strings
Raises:
FileNotFoundError: If baseline file doesn't exist
"""
def reEvaluateBaseline(
baseline: dict[str, list[str]],
actualViolationsInAllFiles: dict[str, list[Violation]],
) -> tuple[bool, dict[str, list[str]], dict[str, list[Violation]]]:
"""
Compare current violations against baseline and determine changes.
Evaluates current violations against the baseline to identify:
- Whether baseline regeneration is needed (violations were fixed)
- Which baseline violations are still present
- Which violations are new (not in baseline)
Parameters:
- baseline: Parsed baseline violations by file
- actualViolationsInAllFiles: Current violations found in files
Returns:
tuple containing:
- bool: Whether baseline regeneration is needed
- dict[str, list[str]]: Unfixed baseline violations still present
- dict[str, list[Violation]]: New violations not in baseline
"""Constants defining the baseline file format structure.
SEPARATOR: str # "--------------------\n" (20 dashes)
LEN_INDENT: int # 4 (indentation length)
ONE_SPACE: str # " " (single space)
INDENT: str # " " (4 spaces for violation indentation)# Step 1: Generate initial baseline from current violations
pydoclint --generate-baseline --baseline=violations-baseline.txt src/
# Step 2: Run normally - only new violations reported
pydoclint --baseline=violations-baseline.txt src/
# Step 3: Auto-regenerate baseline when violations are fixed
pydoclint --baseline=violations-baseline.txt --auto-regenerate-baseline=True src/from pathlib import Path
from pydoclint.baseline import generateBaseline, parseBaseline, reEvaluateBaseline
from pydoclint.main import _checkPaths
# Check files and generate initial baseline
violations = _checkPaths(
paths=("src/",),
style="numpy"
)
baseline_path = Path("current-violations.txt")
generateBaseline(violations, baseline_path)
print(f"Generated baseline with {sum(len(v) for v in violations.values())} violations")
# Later: check against baseline
current_violations = _checkPaths(
paths=("src/",),
style="numpy"
)
# Parse existing baseline
baseline = parseBaseline(baseline_path)
# Compare current violations against baseline
needs_regen, unfixed_baseline, new_violations = reEvaluateBaseline(
baseline, current_violations
)
if needs_regen:
print("Some violations were fixed - baseline needs regeneration")
generateBaseline(unfixed_baseline, baseline_path)
print(f"New violations: {sum(len(v) for v in new_violations.values())}")src/module.py
15: DOC101: Docstring contains fewer arguments than in function signature.
23: DOC201: does not have a return section in docstring
45: DOC103: Docstring arguments are different from function arguments.
--------------------
src/utils.py
8: DOC102: Docstring contains more arguments than in function signature.
34: DOC105: Argument names match, but type hints in these args do not match: x
--------------------# pyproject.toml
[tool.pydoclint]
style = "google"
baseline = "pydoclint-violations.txt"
auto-regenerate-baseline = true
exclude = "tests/|migrations/"# Configuration automatically handles baseline
pydoclint src/ # Uses baseline from config
# Generate new baseline
pydoclint --generate-baseline src/# Phase 1: Generate baseline for entire codebase
pydoclint --generate-baseline --baseline=phase1-baseline.txt .
# Phase 2: Fix critical violations, update baseline
pydoclint --baseline=phase1-baseline.txt . 2>&1 | grep "DOC1" > critical-violations.txt
# Fix DOC1xx violations manually
pydoclint --generate-baseline --baseline=phase2-baseline.txt .
# Phase 3: Continue incremental improvement
pydoclint --baseline=phase2-baseline.txt --auto-regenerate-baseline=True .# Create separate baselines for different modules
pydoclint --generate-baseline --baseline=core-baseline.txt src/core/
pydoclint --generate-baseline --baseline=utils-baseline.txt src/utils/
pydoclint --generate-baseline --baseline=api-baseline.txt src/api/
# Check modules independently
pydoclint --baseline=core-baseline.txt src/core/
pydoclint --baseline=utils-baseline.txt src/utils/
pydoclint --baseline=api-baseline.txt src/api/# .github/workflows/docstring-check.yml
name: Docstring Check
on: [push, pull_request]
jobs:
docstring-lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install pydoclint
run: pip install pydoclint
- name: Check docstrings against baseline
run: |
if [ -f pydoclint-baseline.txt ]; then
pydoclint --baseline=pydoclint-baseline.txt src/
else
pydoclint src/
fi# Script to maintain baseline health
from pathlib import Path
from pydoclint.baseline import parseBaseline, generateBaseline, reEvaluateBaseline
from pydoclint.main import _checkPaths
def maintain_baseline(baseline_path: Path, source_paths: tuple[str, ...]):
"""Maintain baseline by cleaning up fixed violations."""
# Get current violations
current_violations = _checkPaths(source_paths, style="numpy")
if not baseline_path.exists():
print("No baseline exists, generating new one")
generateBaseline(current_violations, baseline_path)
return
# Parse existing baseline
baseline = parseBaseline(baseline_path)
# Check if baseline needs update
needs_regen, unfixed_baseline, new_violations = reEvaluateBaseline(
baseline, current_violations
)
if needs_regen:
print(f"Updating baseline - {len(baseline)} -> {len(unfixed_baseline)} files")
generateBaseline(unfixed_baseline, baseline_path)
new_count = sum(len(v) for v in new_violations.values())
if new_count > 0:
print(f"Found {new_count} new violations not in baseline")
return False
return True
# Usage
success = maintain_baseline(Path("violations.txt"), ("src/",))Install with Tessl CLI
npx tessl i tessl/pypi-pydoclint