tessl/pypi-pdoc

API Documentation for Python Projects with focus on simplicity and automatic HTML generation from docstrings

—

Pending

Overview

Eval results

Files

Search Functionality

Name: tessl/pypi-pdoc
Author: tessl

Client-side search index generation for fast documentation search capabilities in rendered HTML output. Provides full-text search of docstrings, signatures, and identifiers without requiring a server backend.

Capabilities

Search Index Generation

Create searchable indices from documentation objects for client-side search.

def make_index(all_modules: dict[str, doc.Module]) -> dict:
    """
    Build search index data structure from documentation modules.
    
    Parameters:
    - all_modules: dict[str, doc.Module] - All modules to include in search
    
    Returns:
    - dict: Search index data structure containing:
        - 'identifiers': List of all searchable identifiers
        - 'docstrings': Full-text search data for docstrings
        - 'signatures': Function and method signatures
        - 'modules': Module hierarchy information
    
    Features:
    - Full-text indexing of docstrings and comments
    - Identifier-based search with fuzzy matching
    - Hierarchical module and class structure
    - Type annotation inclusion
    """

def precompile_index(all_modules: dict[str, doc.Module]) -> dict:
    """
    Create precompiled search index for optimized client-side performance.
    
    Parameters:
    - all_modules: dict[str, doc.Module] - Modules to index
    
    Returns:
    - dict: Precompiled search index with optimized data structures
    
    Features:
    - Compressed search data for faster loading
    - Pre-computed search rankings
    - Optimized for JavaScript consumption
    - Includes search metadata and configuration
    """

JavaScript Search Index

Generate JavaScript code containing search index for HTML documentation.

def search_index(all_modules: dict[str, doc.Module]) -> str:
    """
    Generate JavaScript search index for client-side search.
    
    Parameters:
    - all_modules: dict[str, doc.Module] - All modules to make searchable
    
    Returns:
    - str: JavaScript code defining search index and search functions
    
    Features:
    - Self-contained JavaScript search engine
    - No external dependencies required
    - Fuzzy matching and ranking algorithms
    - Autocomplete and suggestion support
    """

Usage Examples

Basic Search Index Creation

from pdoc.search import make_index, search_index
from pdoc.doc import Module

# Load multiple modules for comprehensive search
modules = {
    "core": Module.from_name("my_package.core"),
    "utils": Module.from_name("my_package.utils"),
    "plugins": Module.from_name("my_package.plugins")
}

# Create search index data structure
index_data = make_index(modules)

print("Search index contents:")
print(f"Identifiers: {len(index_data['identifiers'])}")
print(f"Modules: {len(index_data['modules'])}")

# Generate JavaScript for HTML documentation
js_search = search_index(modules)
print(f"JavaScript search code: {len(js_search)} characters")

Comprehensive Documentation with Search

from pdoc import render, doc
from pdoc.search import search_index
from pathlib import Path

def generate_searchable_docs(module_names: list[str], output_dir: str):
    """Generate complete documentation with search functionality"""
    
    # Load all modules
    all_modules = {}
    for name in module_names:
        all_modules[name] = doc.Module.from_name(name)
    
    # Configure rendering
    render.configure(
        docformat="google",
        show_source=True,
        math=True
    )
    
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    # Generate HTML for each module
    for module_name, module_obj in all_modules.items():
        html = render.html_module(module_obj, all_modules)
        (output_path / f"{module_name}.html").write_text(html)
    
    # Generate index page
    index_html = render.html_index(all_modules)
    (output_path / "index.html").write_text(index_html)
    
    # Generate search functionality
    search_js = search_index(all_modules)
    (output_path / "search.js").write_text(search_js)
    
    print(f"Documentation with search generated in {output_dir}")
    print(f"Modules: {', '.join(module_names)}")
    print(f"Search index size: {len(search_js)} characters")

# Usage
generate_searchable_docs(
    ["my_package", "my_package.core", "my_package.utils"], 
    "./docs"
)

Custom Search Index Processing

from pdoc.search import make_index, precompile_index
from pdoc.doc import Module
import json

def create_custom_search_index(modules: dict[str, Module]) -> dict:
    """Create custom search index with additional metadata"""
    
    # Generate base index
    base_index = make_index(modules)
    
    # Add custom search metadata
    custom_index = {
        **base_index,
        "custom_metadata": {
            "generation_time": time.time(),
            "module_count": len(modules),
            "total_identifiers": len(base_index["identifiers"])
        }
    }
    
    # Add category-based grouping
    categories = {
        "functions": [],
        "classes": [],
        "variables": [],
        "modules": []
    }
    
    for identifier in base_index["identifiers"]:
        if identifier["type"] == "function":
            categories["functions"].append(identifier)
        elif identifier["type"] == "class":
            categories["classes"].append(identifier)
        elif identifier["type"] == "variable":
            categories["variables"].append(identifier)
        elif identifier["type"] == "module":
            categories["modules"].append(identifier)
    
    custom_index["categories"] = categories
    
    return custom_index

# Create enhanced search index
modules = {"math": Module.from_name("math")}
enhanced_index = create_custom_search_index(modules)

# Save to JSON file for analysis
with open("search_index.json", "w") as f:
    json.dump(enhanced_index, f, indent=2)

Search Performance Analysis

from pdoc.search import make_index, precompile_index
from pdoc.doc import Module
import time

def benchmark_search_generation(module_names: list[str]):
    """Benchmark search index generation performance"""
    
    # Load modules
    print("Loading modules...")
    start_time = time.time()
    
    modules = {}
    for name in module_names:
        modules[name] = Module.from_name(name)
    
    load_time = time.time() - start_time
    print(f"Module loading: {load_time:.2f}s")
    
    # Generate standard index
    print("Generating standard search index...")
    start_time = time.time()
    
    standard_index = make_index(modules)
    
    standard_time = time.time() - start_time
    print(f"Standard index: {standard_time:.2f}s")
    
    # Generate precompiled index
    print("Generating precompiled search index...")
    start_time = time.time()
    
    precompiled_index = precompile_index(modules)
    
    precompiled_time = time.time() - start_time
    print(f"Precompiled index: {precompiled_time:.2f}s")
    
    # Compare sizes
    standard_size = len(str(standard_index))
    precompiled_size = len(str(precompiled_index))
    
    print(f"\nIndex comparison:")
    print(f"Standard size: {standard_size:,} characters")
    print(f"Precompiled size: {precompiled_size:,} characters")
    print(f"Size ratio: {precompiled_size/standard_size:.2f}x")

# Benchmark with various module sets
benchmark_search_generation(["json", "urllib", "pathlib"])

Advanced Search Features

from pdoc.search import make_index
from pdoc.doc import Module
import re

class AdvancedSearchIndex:
    """Advanced search index with custom features"""
    
    def __init__(self, modules: dict[str, Module]):
        self.base_index = make_index(modules)
        self.modules = modules
        self._build_advanced_features()
    
    def _build_advanced_features(self):
        """Build advanced search features"""
        self.tag_index = self._build_tag_index()
        self.similarity_map = self._build_similarity_map()
        self.usage_patterns = self._extract_usage_patterns()
    
    def _build_tag_index(self) -> dict:
        """Build tag-based search index"""
        tags = {}
        
        for identifier in self.base_index["identifiers"]:
            # Extract tags from docstrings
            docstring = identifier.get("docstring", "")
            found_tags = re.findall(r'@(\w+)', docstring)
            
            for tag in found_tags:
                if tag not in tags:
                    tags[tag] = []
                tags[tag].append(identifier["name"])
        
        return tags
    
    def _build_similarity_map(self) -> dict:
        """Build identifier similarity mapping"""
        similarity_map = {}
        identifiers = [id["name"] for id in self.base_index["identifiers"]]
        
        for identifier in identifiers:
            similar = []
            for other in identifiers:
                if identifier != other:
                    # Simple similarity based on common prefixes/suffixes
                    if (identifier.startswith(other[:3]) or 
                        identifier.endswith(other[-3:]) or
                        other.startswith(identifier[:3]) or
                        other.endswith(identifier[-3:])):
                        similar.append(other)
            
            if similar:
                similarity_map[identifier] = similar[:5]  # Top 5 similar
        
        return similarity_map
    
    def _extract_usage_patterns(self) -> dict:
        """Extract common usage patterns from docstrings"""
        patterns = {
            "common_imports": [],
            "typical_usage": [],
            "error_handling": []
        }
        
        for identifier in self.base_index["identifiers"]:
            docstring = identifier.get("docstring", "")
            
            # Find import patterns
            import_matches = re.findall(r'import\s+[\w.]+', docstring)
            patterns["common_imports"].extend(import_matches)
            
            # Find usage examples
            if "example" in docstring.lower():
                patterns["typical_usage"].append(identifier["name"])
            
            # Find error handling mentions
            if any(word in docstring.lower() for word in ["raise", "except", "error"]):
                patterns["error_handling"].append(identifier["name"])
        
        return patterns
    
    def search(self, query: str, search_type: str = "all") -> list:
        """Perform advanced search with multiple strategies"""
        results = []
        
        if search_type in ["all", "identifier"]:
            # Standard identifier search
            for identifier in self.base_index["identifiers"]:
                if query.lower() in identifier["name"].lower():
                    results.append({
                        "type": "identifier",
                        "match": identifier,
                        "score": self._calculate_score(query, identifier["name"])
                    })
        
        if search_type in ["all", "tag"]:
            # Tag-based search
            for tag, identifiers in self.tag_index.items():
                if query.lower() in tag.lower():
                    for identifier_name in identifiers:
                        results.append({
                            "type": "tag",
                            "match": {"name": identifier_name, "tag": tag},
                            "score": 1.0
                        })
        
        # Sort by score and return
        results.sort(key=lambda x: x["score"], reverse=True)
        return results[:20]  # Top 20 results
    
    def _calculate_score(self, query: str, target: str) -> float:
        """Calculate search relevance score"""
        query_lower = query.lower()
        target_lower = target.lower()
        
        if query_lower == target_lower:
            return 1.0
        elif target_lower.startswith(query_lower):
            return 0.9
        elif query_lower in target_lower:
            return 0.7
        else:
            return 0.1

# Usage
modules = {"json": Module.from_name("json")}
advanced_search = AdvancedSearchIndex(modules)

# Perform searches
results = advanced_search.search("load")
for result in results[:5]:
    print(f"Type: {result['type']}, Match: {result['match']['name']}, Score: {result['score']}")