tessl/pypi-lizard

An extensible cyclomatic complexity analyzer for many programming languages including C/C++, Java, JavaScript, Python, Ruby, Swift, and more.

—

Pending

Overview

Eval results

Files

Utility Functions

Name: tessl/pypi-lizard
Author: tessl

Helper functions for file processing, filtering, and output formatting. These utilities support the core analysis functionality with file discovery, result filtering, and various output options.

Capabilities

File Discovery

Functions for finding and filtering source files for analysis.

def get_all_source_files(paths, exclude_patterns, lans):
    """
    Gets all source files from paths with exclusion patterns and language filtering.
    Includes gitignore support and recursive directory traversal.
    
    Args:
        paths (list): List of file or directory paths to search
        exclude_patterns (list): List of glob patterns to exclude from analysis
        lans (list): List of language names to filter (None for all languages)
        
    Returns:
        iterator: Iterator of filtered source file paths
        
    Example:
        files = get_all_source_files(
            ['src/', 'lib/'],
            ['*test*', '*.min.js', 'build/*'],
            ['python', 'javascript']
        )
        for filepath in files:
            print(f"Found: {filepath}")
    """

Result Filtering

Functions for filtering analysis results based on thresholds and criteria.

def warning_filter(option, module_infos):
    """
    Filters functions that exceed specified thresholds.
    
    Args:
        option: Configuration object with threshold settings (CCN, length, etc.)
        module_infos: Iterator of FileInformation objects
        
    Returns:
        generator: Generator yielding functions exceeding thresholds
        
    Example:
        # option.CCN = 10, option.length = 50
        warnings = warning_filter(options, analysis_results)
        for func_info in warnings:
            print(f"Warning: {func_info.name} exceeds thresholds")
    """

def whitelist_filter(warnings, script=None, whitelist=None):
    """
    Filters warnings based on whitelist configuration.
    Removes warnings for functions/files specified in whitelist.
    
    Args:
        warnings: Iterator of warning objects
        script (str): Path to whitelist script (optional)
        whitelist (str): Path to whitelist file (optional, default: "whitelizard.txt")
        
    Returns:
        generator: Generator yielding filtered warnings not in whitelist
        
    Example:
        filtered_warnings = whitelist_filter(warnings, whitelist="ignore.txt")
        for warning in filtered_warnings:
            print(f"Genuine warning: {warning}")
    """

File Hashing

Function for generating file hashes for duplicate detection.

def md5_hash_file(full_path_name):
    """
    Calculates MD5 hash of a file for duplicate detection.
    
    Args:
        full_path_name (str): Full path to the file to hash
        
    Returns:
        str: MD5 hash string of file content
        
    Example:
        hash1 = md5_hash_file('src/file1.py')
        hash2 = md5_hash_file('src/file2.py')
        if hash1 == hash2:
            print("Files are identical")
    """

Output Functions

Functions for formatting and displaying analysis results in different styles.

def print_clang_style_warning(code_infos, option, scheme, _):
    """
    Prints warnings in clang/gcc compiler format.
    
    Args:
        code_infos: Iterator of code information objects
        option: Configuration options object
        scheme: Output formatting scheme
        _: Unused parameter (for interface compatibility)
        
    Returns:
        int: Number of warnings printed
        
    Example Output:
        src/app.py:25: warning: function has high complexity (15)
    """

def print_msvs_style_warning(code_infos, option, scheme, _):
    """
    Prints warnings in Microsoft Visual Studio format.
    
    Args:
        code_infos: Iterator of code information objects  
        option: Configuration options object
        scheme: Output formatting scheme
        _: Unused parameter
        
    Returns:
        int: Number of warnings printed
        
    Example Output:
        src/app.py(25) : warning: function has high complexity (15)
    """

def silent_printer(result, *_):
    """
    Silent printer that exhausts result iterator without output.
    Used for analysis without display output.
    
    Args:
        result: Iterator of results to consume
        *_: Additional arguments (ignored)
        
    Returns:
        int: Always returns 0
        
    Example:
        # Analyze without printing results
        exit_code = silent_printer(analysis_results)
    """

Threading and Parallel Processing

Functions for managing multi-threaded analysis and parallel file processing.

def map_files_to_analyzer(files, analyzer, working_threads):
    """
    Maps files to analyzer using appropriate threading method.
    
    Args:
        files: Iterator of file paths to analyze
        analyzer: FileAnalyzer instance to use for analysis
        working_threads (int): Number of threads to use (1 for single-threaded)
        
    Returns:
        iterator: Results from analyzing files
        
    Example:
        analyzer = FileAnalyzer([])
        files = ['app.py', 'utils.py', 'config.py']
        results = map_files_to_analyzer(files, analyzer, 4)
        for result in results:
            print(f"Analyzed: {result.filename}")
    """

def get_map_method(working_threads):
    """
    Returns appropriate mapping method based on thread count.
    
    Args:
        working_threads (int): Number of working threads
        
    Returns:
        function: Either multiprocessing.Pool.imap_unordered or built-in map
        
    Example:
        map_func = get_map_method(4)  # Returns pool.imap_unordered
        map_func = get_map_method(1)  # Returns built-in map
    """

def print_extension_results(extensions):
    """
    Prints results from analysis extensions that have print_result method.
    
    Args:
        extensions (list): List of extension objects
        
    Example:
        extensions = get_extensions(['wordcount', 'duplicate'])
        print_extension_results(extensions)
    """

Constants

Default configuration values used throughout the system.

DEFAULT_CCN_THRESHOLD: int = 15
"""Default cyclomatic complexity threshold for warnings"""

DEFAULT_WHITELIST: str = "whitelizard.txt" 
"""Default whitelist filename for filtering warnings"""

DEFAULT_MAX_FUNC_LENGTH: int = 1000
"""Default maximum function length threshold"""

Usage Examples

File Discovery with Filtering

from lizard import get_all_source_files

# Find all Python and JavaScript files, excluding tests and build artifacts
source_files = get_all_source_files(
    paths=['src/', 'lib/', 'app/'],
    exclude_patterns=[
        '*test*',           # Exclude test files
        '*Test*',           # Exclude Test files  
        '*/tests/*',        # Exclude tests directories
        '*/node_modules/*', # Exclude npm dependencies
        '*/build/*',        # Exclude build artifacts
        '*.min.js',         # Exclude minified files
        '*/migrations/*'    # Exclude database migrations
    ],
    lans=['python', 'javascript']
)

print("Source files found:")
for filepath in source_files:
    print(f"  {filepath}")

Threshold-Based Filtering

import lizard
from lizard import warning_filter

# Create configuration with custom thresholds
class AnalysisOptions:
    def __init__(self):
        self.CCN = 8           # Complexity threshold  
        self.length = 40       # Function length threshold
        self.arguments = 4     # Parameter count threshold
        self.nloc = 30        # Lines of code threshold

options = AnalysisOptions()

# Analyze files
results = lizard.analyze(['src/'])

# Filter functions exceeding thresholds
warnings = warning_filter(options, results)

print("Functions exceeding thresholds:")
for func_info in warnings:
    issues = []
    if func_info.cyclomatic_complexity > options.CCN:
        issues.append(f"CCN={func_info.cyclomatic_complexity}")
    if func_info.length > options.length:
        issues.append(f"Length={func_info.length}")
    if func_info.parameter_count > options.arguments:
        issues.append(f"Args={func_info.parameter_count}")
    if func_info.nloc > options.nloc:
        issues.append(f"NLOC={func_info.nloc}")
    
    print(f"  {func_info.name}: {', '.join(issues)}")

Whitelist Filtering

from lizard import warning_filter, whitelist_filter
import lizard

# Create whitelist file
whitelist_content = """
# Ignore complex legacy functions
src/legacy.py:old_complex_function
src/legacy.py:another_complex_function

# Ignore generated code
src/generated/*

# Ignore specific patterns
*_test.py:*
"""

with open('project_whitelist.txt', 'w') as f:
    f.write(whitelist_content)

# Analyze and filter
results = lizard.analyze(['src/'])
warnings = warning_filter(options, results)
filtered_warnings = whitelist_filter(warnings, whitelist='project_whitelist.txt')

print("Warnings after whitelist filtering:")
for warning in filtered_warnings:
    print(f"  {warning.name} in {warning.filename}")

File Duplicate Detection

from lizard import md5_hash_file
import os

def find_duplicate_files(directory):
    """Find duplicate files by MD5 hash comparison."""
    file_hashes = {}
    duplicates = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(('.py', '.js', '.java', '.cpp')):
                filepath = os.path.join(root, file)
                try:
                    filehash = md5_hash_file(filepath)
                    if filehash in file_hashes:
                        duplicates.append((filepath, file_hashes[filehash]))
                    else:
                        file_hashes[filehash] = filepath
                except Exception as e:
                    print(f"Error hashing {filepath}: {e}")
    
    return duplicates

# Find duplicates in source directory
duplicates = find_duplicate_files('src/')
if duplicates:
    print("Duplicate files found:")
    for file1, file2 in duplicates:
        print(f"  {file1} == {file2}")
else:
    print("No duplicate files found")

Custom Output Formatting

from lizard import print_clang_style_warning, print_msvs_style_warning
import lizard

class CustomOptions:
    def __init__(self):
        self.CCN = 10
        self.length = 50

class CustomScheme:
    def function_info(self, func):
        return f"{func.name}: CCN={func.cyclomatic_complexity}, NLOC={func.nloc}"

options = CustomOptions()  
scheme = CustomScheme()

# Analyze code
results = lizard.analyze(['src/'])
warnings = lizard.warning_filter(options, results)

# Print warnings in different formats
print("Clang-style warnings:")
clang_count = print_clang_style_warning(warnings, options, scheme, None)

print(f"\nTotal warnings: {clang_count}")

# Reset iterator for second format
warnings = lizard.warning_filter(options, lizard.analyze(['src/']))
print("\nVisual Studio-style warnings:")
msvs_count = print_msvs_style_warning(warnings, options, scheme, None)

Silent Analysis

from lizard import silent_printer
import lizard

# Perform analysis without output (for programmatic use)
results = lizard.analyze(['src/'])

# Count results without printing
result_list = list(results)
total_files = len(result_list)
total_functions = sum(len(fi.function_list) for fi in result_list)

print(f"Silent analysis complete:")
print(f"  Files analyzed: {total_files}")  
print(f"  Functions found: {total_functions}")

# Use silent printer to consume iterator without output
results = lizard.analyze(['src/'])
exit_code = silent_printer(results)
print(f"Analysis exit code: {exit_code}")

Install with Tessl CLI