CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-semgrep

Lightweight static analysis for many languages with programmatic Python API for custom integrations.

Pending
Overview
Eval results
Files

target-management.mddocs/

Target Management

Target management system for discovering, filtering, and processing files for semgrep scanning. The target manager handles file discovery, language detection, and exclusion patterns.

Capabilities

Target Manager

Primary class for managing scan targets and file discovery.

class TargetManager:
    """
    Manages target files for scanning with file discovery and filtering.
    
    Handles file discovery, language detection, gitignore patterns,
    and file size limits for efficient scanning.
    
    Attributes:
    - scanning_root (ScanningRoot): Root directory configuration
    - target_files (list): Discovered target files
    - filtered_files (FilteredFiles): Files excluded from scanning
    - include_patterns (list): Patterns for file inclusion
    - exclude_patterns (list): Patterns for file exclusion
    """
    def __init__(self, scanning_root, **kwargs): ...
    
    def get_all_targets(self): ...
    def filter_by_language(self, language): ...
    def filter_by_size(self, max_size): ...
    def apply_exclusions(self, patterns): ...
    def get_target_count(self): ...

class ScanningRoot:
    """
    Root directory configuration for scanning.
    
    Defines the base directory and scanning parameters
    for target discovery.
    
    Attributes:
    - path (str): Root directory path
    - respect_gitignore (bool): Whether to respect .gitignore files
    - baseline_handler (BaselineHandler): Handler for baseline comparison
    """
    def __init__(self, path, **kwargs): ...
    
    def get_path(self): ...
    def is_valid(self): ...
    def get_gitignore_patterns(self): ...

class TargetScanResult:
    """
    Results of target file scanning and discovery.
    
    Contains discovered files, exclusions, and metadata
    about the target discovery process.
    
    Attributes:
    - targets (list): List of Target objects
    - filtered_files (FilteredFiles): Excluded files with reasons
    - stats (dict): Discovery statistics
    """
    def __init__(self, targets, filtered_files): ...
    
    def get_target_count(self): ...
    def get_filtered_count(self): ...
    def get_total_size(self): ...

Target Information

Classes for representing individual scan targets.

class Target:
    """
    Represents a single file target for scanning.
    
    Attributes:
    - path (str): File path relative to scanning root
    - language (str): Detected programming language
    - size (int): File size in bytes
    - encoding (str): File encoding (utf-8, etc.)
    """
    def __init__(self, path, language=None): ...
    
    def get_path(self): ...
    def get_language(self): ...
    def get_size(self): ...
    def is_binary(self): ...

class TargetInfo:
    """
    Additional metadata about target files.
    
    Provides extended information about files including
    modification times, permissions, and content analysis.
    
    Attributes:
    - target (Target): Associated target file
    - last_modified (datetime): File modification time
    - permissions (str): File permissions
    - line_count (int): Number of lines in file
    """
    def __init__(self, target): ...
    
    def get_metadata(self): ...
    def analyze_content(self): ...

File Filtering

Classes for managing file filtering and exclusions.

class FilteredFiles:
    """
    Collection of files excluded from scanning with reasons.
    
    Tracks files that were excluded and the reasons for exclusion
    such as size limits, language filters, or ignore patterns.
    
    Attributes:
    - excluded_files (list): List of excluded file paths
    - exclusion_reasons (dict): Mapping of files to exclusion reasons
    - patterns_matched (dict): Which patterns matched each file
    """
    def __init__(self): ...
    
    def add_excluded_file(self, path, reason): ...
    def get_excluded_count(self): ...
    def get_exclusions_by_reason(self, reason): ...
    def get_exclusion_summary(self): ...

class FileErrorLog:
    """
    Log of file processing errors during target discovery.
    
    Tracks files that couldn't be processed due to errors
    like permission issues, encoding problems, or corruption.
    
    Attributes:
    - error_files (list): Files with processing errors
    - error_details (dict): Detailed error information
    """
    def __init__(self): ...
    
    def add_error(self, path, error): ...
    def get_error_count(self): ...
    def get_errors_by_type(self, error_type): ...

class FileTargetingLog:
    """
    Log of file targeting decisions and statistics.
    
    Provides detailed logging of the file discovery process
    including timing, patterns matched, and decisions made.
    
    Attributes:
    - discovered_files (int): Total files discovered
    - included_files (int): Files included for scanning
    - excluded_files (int): Files excluded from scanning
    - timing_stats (dict): Performance timing information
    """
    def __init__(self): ...
    
    def log_discovery(self, file_count): ...
    def log_inclusion(self, path, reason): ...
    def log_exclusion(self, path, reason): ...
    def get_summary(self): ...

Usage Examples

Basic Target Management

from semgrep.target_manager import TargetManager, ScanningRoot

# Create scanning root
root = ScanningRoot(
    path="./src",
    respect_gitignore=True
)

# Create target manager
target_manager = TargetManager(
    scanning_root=root,
    max_target_bytes=1000000,  # 1MB limit
    include_patterns=["*.py", "*.js"],
    exclude_patterns=["**/node_modules/**"]
)

# Discover targets
targets = target_manager.get_all_targets()
print(f"Found {len(targets)} files to scan")

# Filter by language
python_targets = target_manager.filter_by_language("python")
print(f"Python files: {len(python_targets)}")

Advanced Target Filtering

from semgrep.target_manager import TargetManager, FilteredFiles

# Get target discovery results
result = target_manager.discover_targets()

# Examine excluded files
filtered = result.filtered_files
print(f"Excluded {filtered.get_excluded_count()} files")

# Get exclusions by reason
size_exclusions = filtered.get_exclusions_by_reason("file_too_large")
print(f"Files excluded for size: {len(size_exclusions)}")

# Get exclusion summary
summary = filtered.get_exclusion_summary()
for reason, count in summary.items():
    print(f"{reason}: {count} files")

Working with Individual Targets

from semgrep.target_manager import Target, TargetInfo

# Create target
target = Target(
    path="src/main.py",
    language="python"
)

# Get target information
info = TargetInfo(target)
metadata = info.get_metadata()

print(f"File: {target.get_path()}")
print(f"Language: {target.get_language()}")
print(f"Size: {target.get_size()} bytes")
print(f"Last modified: {metadata['last_modified']}")
print(f"Line count: {metadata['line_count']}")

Install with Tessl CLI

npx tessl i tessl/pypi-semgrep

docs

cicd-integration.md

configuration.md

core-scanning.md

error-handling.md

index.md

output-formatting.md

rules-matches.md

target-management.md

tile.json