tessl/pypi-semgrep

Lightweight static analysis for many languages with programmatic Python API for custom integrations.

—

Pending

Overview

Eval results

Files

Rules and Matches

Name: tessl/pypi-semgrep
Author: tessl

Classes and functions for working with semgrep rules and processing scan results, including rule validation and match filtering.

Capabilities

Rule Management

Core rule representation and manipulation.

class Rule:
    """
    Represents a semgrep rule with all its properties.
    
    Attributes:
    - id (str): Unique rule identifier
    - message (str): Human-readable description of what the rule finds
    - languages (list): Programming languages this rule applies to
    - severity (str): Rule severity level (INFO, WARNING, ERROR)
    - pattern (str): Primary pattern to match
    - patterns (list): Complex pattern combinations
    - metadata (dict): Additional rule metadata
    - paths (dict): File path inclusion/exclusion patterns
    - fix (str): Suggested fix for autofix functionality
    """
    def __init__(self, raw: Dict[str, Any], yaml: Optional[YamlTree[YamlMap]] = None) -> None: ...
    
    def validate(self): ...
    def get_languages(self): ...  
    def matches_language(self, language): ...
    def get_severity(self): ...

Match Processing

Classes for representing and processing scan results.

class RuleMatch:
    """
    Represents a single finding from a semgrep scan.
    
    Attributes:
    - match (dict): Core match data with location and content
    - message (str): Human-readable match message
    - severity (str): Match severity (INFO, WARNING, ERROR) 
    - metadata (dict): Match metadata and extra information
    - path (str): File path where match was found
    - start (dict): Start position (line, col, offset)
    - end (dict): End position (line, col, offset)
    - extra (dict): Additional match information
    - check_id (str): Rule ID that generated this match
    - fix (str): Suggested fix text if available
    """
    def __init__(self, match_dict): ...
    
    def get_lines(self): ...
    def get_code_snippet(self): ...
    def has_fix(self): ...
    def to_dict(self): ...

class RuleMatches:
    """
    Collection of rule matches with filtering and sorting capabilities.
    
    Methods for filtering by severity, file patterns, and rule IDs.
    """
    def __init__(self, matches=None): ...
    
    def add_match(self, match): ...
    def filter_by_severity(self, severities): ...
    def filter_by_path_pattern(self, pattern): ...
    def filter_by_rule_ids(self, rule_ids): ...
    def sort_by_file(self): ...
    def sort_by_severity(self): ...
    def to_dict(self): ...

Rule Validation

Functions for validating rule syntax and schema compliance.

def validate_single_rule(rule_dict):
    """
    Validate a single rule against the semgrep schema.
    
    Parameters:
    - rule_dict (dict): Rule definition to validate
    
    Returns:
    Rule: Validated rule object
    
    Raises:
    InvalidRuleSchemaError: If rule validation fails
    """

def validate_rule_schema(rules):
    """
    Validate multiple rules against schema.
    
    Parameters:  
    - rules (list): List of rule dictionaries
    
    Returns:
    list: List of validated Rule objects
    
    Raises:
    InvalidRuleSchemaError: If any rule validation fails
    """

Types

Rule Pattern Types

class PatternType:
    """
    Enumeration of pattern types used in rules.
    
    Values:
    - PATTERN: Simple pattern matching
    - PATTERN_EITHER: Match any of multiple patterns  
    - PATTERN_ALL: Match all patterns
    - PATTERN_NOT: Exclude matches for pattern
    - PATTERN_INSIDE: Pattern must be inside another pattern
    - PATTERN_REGEX: Regular expression pattern
    """
    PATTERN = "pattern"
    PATTERN_EITHER = "pattern-either"
    PATTERN_ALL = "pattern-all"
    PATTERN_NOT = "pattern-not"
    PATTERN_INSIDE = "pattern-inside"
    PATTERN_REGEX = "pattern-regex"

class SeverityLevel:
    """
    Rule and match severity levels.
    
    Values:
    - INFO: Informational findings
    - WARNING: Potential issues that should be reviewed
    - ERROR: Definite issues that should be fixed
    """
    INFO = "INFO"
    WARNING = "WARNING" 
    ERROR = "ERROR"

Match Location Types

class Position:
    """
    Represents a position in source code.
    
    Attributes:
    - line (int): Line number (1-based)
    - col (int): Column number (1-based)  
    - offset (int): Character offset from file start
    """
    line: int
    col: int
    offset: int

class Location:
    """
    Represents a location span in source code.
    
    Attributes:
    - start (Position): Start position
    - end (Position): End position
    - path (str): File path
    """
    start: Position
    end: Position
    path: str

Usage Examples

Working with Rules

from semgrep.rule import Rule
from semgrep.config_resolver import validate_single_rule

# Create rule from dictionary
rule_dict = {
    "id": "hardcoded-password",
    "pattern": "password = \"$VALUE\"",
    "message": "Hardcoded password found",
    "languages": ["python"],
    "severity": "ERROR"
}

rule = validate_single_rule(rule_dict)
print(f"Rule ID: {rule.id}")
print(f"Languages: {rule.get_languages()}")
print(f"Severity: {rule.get_severity()}")

Processing Scan Results

from semgrep.rule_match import RuleMatches, SeverityLevel

# Assume we have scan results
results = run_scan_and_return_json(target_manager, config)

# Create RuleMatches collection
matches = RuleMatches(results.get('results', []))

# Filter high severity matches
high_severity_matches = matches.filter_by_severity([SeverityLevel.ERROR])

# Process each match
for match in high_severity_matches:
    print(f"File: {match.path}")
    print(f"Line: {match.start['line']}")
    print(f"Message: {match.message}")
    print(f"Code: {match.get_code_snippet()}")
    
    if match.has_fix():
        print(f"Suggested fix: {match.fix}")

Install with Tessl CLI