Lightweight static analysis for many languages with programmatic Python API for custom integrations.
—
The core scanning engine provides the main functionality for running semgrep analysis on codebases. It handles target discovery, rule execution, baseline comparison, and result aggregation.
from semgrep.run_scan import run_scan, run_scan_and_return_json, baseline_run
from semgrep.output import OutputHandler, OutputSettings
from semgrep.target_manager import TargetManager
from semgrep.rule_match import RuleMatchMap
from semgrep.baseline import BaselineHandler
from semgrep.semgrep_types import EngineType
from pathlib import Path
from typing import List, Optional, Sequence, FrozenSet, Union, Any, DictPrimary functions for executing semgrep scans with full configuration support.
def run_scan(
*,
dump_command_for_core: bool = False,
time_flag: bool = False,
matching_explanations: bool = False,
engine_type: EngineType = EngineType.OSS,
run_secrets: bool = False,
output_handler: OutputHandler,
scanning_roots: Sequence[str],
**kwargs
):
"""
Execute a semgrep scan with comprehensive configuration options.
Key Parameters (keyword-only):
- output_handler (OutputHandler): Required handler for output formatting
- scanning_roots (Sequence[str]): Required sequence of paths to scan
- dump_command_for_core (bool): Debug flag for core command dumping
- time_flag (bool): Enable timing information
- matching_explanations (bool): Include pattern matching explanations
- engine_type (EngineType): Scan engine type (OSS by default)
- run_secrets (bool): Enable secrets scanning
- **kwargs: Many additional configuration parameters
Returns:
Complex tuple with scan results, errors, statistics, and metadata
Note: This function has 50+ parameters. See source code for complete signature.
"""
def run_scan_and_return_json(
*,
config: Path,
scanning_roots: List[Path],
output_settings: Optional[OutputSettings] = None,
**kwargs: Any
) -> Union[Dict[str, Any], str]:
"""
Execute a semgrep scan and return results as JSON.
Parameters (keyword-only):
- config (Path): Path to configuration file
- scanning_roots (List[Path]): List of paths to scan
- output_settings (OutputSettings, optional): Output formatting configuration
- **kwargs: Additional scan parameters passed to run_scan
Returns:
Union[Dict[str, Any], str]: Scan results in JSON format or JSON string
"""Functions for comparing current scan results against a baseline to identify new findings.
def baseline_run(
baseline_handler: BaselineHandler,
baseline_commit: Optional[str],
rule_matches_by_rule: RuleMatchMap,
all_subprojects: List[Union[out.UnresolvedSubproject, out.ResolvedSubproject]],
scanning_root_strings: FrozenSet[Path],
**kwargs
):
"""
Execute a baseline scan to compare against previous results.
Parameters:
- baseline_handler (BaselineHandler): Handler for baseline comparison logic
- baseline_commit (Optional[str]): Git commit hash for baseline comparison
- rule_matches_by_rule (RuleMatchMap): Current scan results by rule
- all_subprojects (List): List of project and subproject configurations
- scanning_root_strings (FrozenSet[Path]): Set of scanning root paths
- **kwargs: Additional scan parameters
Returns:
Baseline comparison results and metadata
"""Functions for dependency-aware rule filtering and dependency resolution.
def filter_dependency_aware_rules(rules, dependencies):
"""
Filter rules based on project dependencies.
Parameters:
- rules (list): List of Rule objects to filter
- dependencies (dict): Project dependency information
Returns:
list: Filtered rules applicable to the dependencies
"""
def resolve_dependencies(target_manager, config):
"""
Resolve project dependencies for dependency-aware analysis.
Parameters:
- target_manager (TargetManager): Target file manager
- config (Config): Scan configuration
Returns:
dict: Resolved dependency information
"""Helper functions for scan optimization and environment setup.
def adjust_python_recursion_limit(new_limit=None):
"""
Adjust Python recursion limit for deep scanning operations.
Parameters:
- new_limit (int, optional): New recursion limit, defaults to calculated value
Returns:
int: Previous recursion limit
"""# Import required types
from semgrep.output import OutputHandler, OutputSettings
from semgrep.rule_match import RuleMatchMap
from semgrep.baseline import BaselineHandler
from semgrep.semgrep_types import EngineType
from semgrep import semgrep_output_v1 as out
from typing import Tuple, List, Dict, Set, Path, Any, Union, Optional
# Return types for run_scan are complex tuples containing:
# - FilteredMatches: Processed rule matches
# - List[SemgrepError]: Any errors encountered
# - Set[Path]: Files that were processed
# - FileTargetingLog: File targeting information
# - List[Rule]: Rules that were executed
# - ProfileManager: Performance profiling data
# - OutputExtra: Additional output metadata
# - Collection[out.MatchSeverity]: Severity information
# - Dict with dependency information
# - Various counts and subproject informationInstall with Tessl CLI
npx tessl i tessl/pypi-semgrep