CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pydoclint

A Python docstring linter that checks arguments, returns, yields, and raises sections

Pending
Overview
Eval results
Files

baseline.mddocs/

Baseline Management

Support for baseline files that enable gradual adoption of pydoclint in large codebases by tracking existing violations and only reporting new ones.

Capabilities

Baseline Generation

Functions for creating and managing baseline files that track existing violations to enable gradual adoption.

def generateBaseline(
    violationsAllFiles: dict[str, list[Violation]] | dict[str, list[str]],
    path: Path,
) -> None:
    """
    Generate baseline file based on passed violations.
    
    Creates a baseline file containing all current violations, allowing
    future runs to only report new violations not present in the baseline.
    
    Parameters:
    - violationsAllFiles: Mapping of file paths to their violations
    - path: Path where baseline file should be written
    
    The baseline file format:
    - Each file section starts with the file path
    - Violations are indented with 4 spaces
    - File sections are separated by 20 dashes
    """

def parseBaseline(path: Path) -> dict[str, list[str]]:
    """
    Parse existing baseline file.
    
    Reads and parses a baseline file created by generateBaseline,
    returning the violations organized by file path.
    
    Parameters:
    - path: Path to baseline file to parse
    
    Returns:
    dict[str, list[str]]: Mapping of file paths to violation strings
    
    Raises:
    FileNotFoundError: If baseline file doesn't exist
    """

def reEvaluateBaseline(
    baseline: dict[str, list[str]],
    actualViolationsInAllFiles: dict[str, list[Violation]],
) -> tuple[bool, dict[str, list[str]], dict[str, list[Violation]]]:
    """
    Compare current violations against baseline and determine changes.
    
    Evaluates current violations against the baseline to identify:
    - Whether baseline regeneration is needed (violations were fixed)
    - Which baseline violations are still present
    - Which violations are new (not in baseline)
    
    Parameters:
    - baseline: Parsed baseline violations by file
    - actualViolationsInAllFiles: Current violations found in files
    
    Returns:
    tuple containing:
    - bool: Whether baseline regeneration is needed
    - dict[str, list[str]]: Unfixed baseline violations still present
    - dict[str, list[Violation]]: New violations not in baseline
    """

Baseline File Format Constants

Constants defining the baseline file format structure.

SEPARATOR: str  # "--------------------\n" (20 dashes)
LEN_INDENT: int  # 4 (indentation length)
ONE_SPACE: str  # " " (single space)
INDENT: str  # "    " (4 spaces for violation indentation)

Usage Examples

Basic Baseline Workflow

# Step 1: Generate initial baseline from current violations
pydoclint --generate-baseline --baseline=violations-baseline.txt src/

# Step 2: Run normally - only new violations reported
pydoclint --baseline=violations-baseline.txt src/

# Step 3: Auto-regenerate baseline when violations are fixed
pydoclint --baseline=violations-baseline.txt --auto-regenerate-baseline=True src/

Programmatic Baseline Management

from pathlib import Path
from pydoclint.baseline import generateBaseline, parseBaseline, reEvaluateBaseline
from pydoclint.main import _checkPaths

# Check files and generate initial baseline
violations = _checkPaths(
    paths=("src/",),
    style="numpy"
)

baseline_path = Path("current-violations.txt")
generateBaseline(violations, baseline_path)
print(f"Generated baseline with {sum(len(v) for v in violations.values())} violations")

# Later: check against baseline
current_violations = _checkPaths(
    paths=("src/",),
    style="numpy"
)

# Parse existing baseline
baseline = parseBaseline(baseline_path)

# Compare current violations against baseline
needs_regen, unfixed_baseline, new_violations = reEvaluateBaseline(
    baseline, current_violations
)

if needs_regen:
    print("Some violations were fixed - baseline needs regeneration")
    generateBaseline(unfixed_baseline, baseline_path)

print(f"New violations: {sum(len(v) for v in new_violations.values())}")

Baseline File Format Example

src/module.py
    15: DOC101: Docstring contains fewer arguments than in function signature.
    23: DOC201: does not have a return section in docstring
    45: DOC103: Docstring arguments are different from function arguments.
--------------------
src/utils.py
    8: DOC102: Docstring contains more arguments than in function signature.
    34: DOC105: Argument names match, but type hints in these args do not match: x
--------------------

Configuration-Based Baseline

# pyproject.toml
[tool.pydoclint]
style = "google"
baseline = "pydoclint-violations.txt"
auto-regenerate-baseline = true
exclude = "tests/|migrations/"
# Configuration automatically handles baseline
pydoclint src/  # Uses baseline from config

# Generate new baseline
pydoclint --generate-baseline src/

Advanced Baseline Workflows

Gradual Migration Strategy

# Phase 1: Generate baseline for entire codebase
pydoclint --generate-baseline --baseline=phase1-baseline.txt .

# Phase 2: Fix critical violations, update baseline
pydoclint --baseline=phase1-baseline.txt . 2>&1 | grep "DOC1" > critical-violations.txt
# Fix DOC1xx violations manually
pydoclint --generate-baseline --baseline=phase2-baseline.txt .

# Phase 3: Continue incremental improvement
pydoclint --baseline=phase2-baseline.txt --auto-regenerate-baseline=True .

Per-Module Baselines

# Create separate baselines for different modules
pydoclint --generate-baseline --baseline=core-baseline.txt src/core/
pydoclint --generate-baseline --baseline=utils-baseline.txt src/utils/
pydoclint --generate-baseline --baseline=api-baseline.txt src/api/

# Check modules independently
pydoclint --baseline=core-baseline.txt src/core/
pydoclint --baseline=utils-baseline.txt src/utils/
pydoclint --baseline=api-baseline.txt src/api/

CI/CD Integration

# .github/workflows/docstring-check.yml
name: Docstring Check
on: [push, pull_request]

jobs:
  docstring-lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.9'
      - name: Install pydoclint
        run: pip install pydoclint
      - name: Check docstrings against baseline
        run: |
          if [ -f pydoclint-baseline.txt ]; then
            pydoclint --baseline=pydoclint-baseline.txt src/
          else
            pydoclint src/
          fi

Baseline Maintenance

# Script to maintain baseline health
from pathlib import Path
from pydoclint.baseline import parseBaseline, generateBaseline, reEvaluateBaseline
from pydoclint.main import _checkPaths

def maintain_baseline(baseline_path: Path, source_paths: tuple[str, ...]):
    """Maintain baseline by cleaning up fixed violations."""
    
    # Get current violations
    current_violations = _checkPaths(source_paths, style="numpy")
    
    if not baseline_path.exists():
        print("No baseline exists, generating new one")
        generateBaseline(current_violations, baseline_path)
        return
    
    # Parse existing baseline
    baseline = parseBaseline(baseline_path)
    
    # Check if baseline needs update
    needs_regen, unfixed_baseline, new_violations = reEvaluateBaseline(
        baseline, current_violations
    )
    
    if needs_regen:
        print(f"Updating baseline - {len(baseline)} -> {len(unfixed_baseline)} files")
        generateBaseline(unfixed_baseline, baseline_path)
    
    new_count = sum(len(v) for v in new_violations.values())
    if new_count > 0:
        print(f"Found {new_count} new violations not in baseline")
        return False
    
    return True

# Usage
success = maintain_baseline(Path("violations.txt"), ("src/",))

Install with Tessl CLI

npx tessl i tessl/pypi-pydoclint

docs

baseline.md

cli.md

configuration.md

core-analysis.md

flake8-plugin.md

index.md

utility-apis.md

violations.md

tile.json