tessl/pypi-deepeval

Comprehensive LLM evaluation framework with 50+ metrics for testing RAG, chatbots, and AI agents

Overview

Eval results

Files

Synthesizer

Name: tessl/pypi-deepeval
Author: tessl

Synthetic test data generation using various evolution strategies (reasoning, multi-context, concretizing, etc.) to create diverse and challenging test cases. Generate goldens from documents, contexts, or from scratch.

Imports

from deepeval.synthesizer import (
    Synthesizer,
    Evolution,
    PromptEvolution,
    FiltrationConfig,
    EvolutionConfig,
    StylingConfig,
    ContextConstructionConfig
)

Capabilities

Synthesizer Class

Main class for generating synthetic test data.

class Synthesizer:
    """
    Generates synthetic test data and goldens.

    Parameters:
    - model (Union[str, DeepEvalBaseLLM], optional): Model for generation
    - async_mode (bool): Async mode (default: True)
    - max_concurrent (int): Max concurrent tasks (default: 100)
    - filtration_config (FiltrationConfig, optional): Filtration configuration
    - evolution_config (EvolutionConfig, optional): Evolution configuration
    - styling_config (StylingConfig, optional): Styling configuration
    - cost_tracking (bool): Track API costs (default: False)

    Methods:
    - generate_goldens_from_docs(document_paths, **kwargs) -> List[Golden]
    - a_generate_goldens_from_docs(document_paths, **kwargs) -> List[Golden]
    - generate_goldens_from_contexts(contexts, **kwargs) -> List[Golden]
    - a_generate_goldens_from_contexts(contexts, **kwargs) -> List[Golden]
    - generate_goldens_from_scratch(num_goldens, **kwargs) -> List[Golden]
    - a_generate_goldens_from_scratch(num_goldens, **kwargs) -> List[Golden]
    - generate_goldens_from_goldens(goldens, **kwargs) -> List[Golden]
    - a_generate_goldens_from_goldens(goldens, **kwargs) -> List[Golden]
    - save_as(file_type, directory, file_name=None): Save synthetic goldens
    - to_pandas() -> pd.DataFrame: Convert to pandas DataFrame
    """

Evolution Types

Input evolution strategies for creating diverse test cases.

class Evolution:
    """
    Enum of input evolution strategies.

    Values:
    - REASONING: Add reasoning complexity
    - MULTICONTEXT: Require multiple contexts
    - CONCRETIZING: Make more concrete/specific
    - CONSTRAINED: Add constraints
    - COMPARATIVE: Add comparisons
    - HYPOTHETICAL: Make hypothetical
    - IN_BREADTH: Broaden scope
    """

class PromptEvolution:
    """
    Enum of prompt evolution (for scratch generation).

    Values:
    - REASONING
    - CONCRETIZING
    - CONSTRAINED
    - COMPARATIVE
    - HYPOTHETICAL
    - IN_BREADTH
    """

Configuration Classes

class FiltrationConfig:
    """
    Configuration for synthetic data filtration.

    Parameters:
    - synthetic_input_quality_threshold (float): Quality threshold (default: 0.5)
    - max_quality_retries (int): Max retries for quality (default: 3)
    - critic_model (Union[str, DeepEvalBaseLLM], optional): Critic model for quality assessment
    """

class EvolutionConfig:
    """
    Configuration for input evolution.

    Parameters:
    - num_evolutions (int): Number of evolution iterations (default: 1)
    - evolutions (Dict[Evolution, float]): Evolution types and weights (default: equal distribution)
    """

class StylingConfig:
    """
    Configuration for output styling.

    Parameters:
    - scenario (str, optional): Scenario description
    - task (str, optional): Task description
    - input_format (str, optional): Input format specification
    - expected_output_format (str, optional): Expected output format
    """

class ContextConstructionConfig:
    """
    Configuration for context construction from documents.

    Parameters:
    - embedder (Union[str, DeepEvalBaseEmbeddingModel], optional): Embedding model
    - critic_model (Union[str, DeepEvalBaseLLM], optional): Critic model
    - encoding (str, optional): Text encoding
    - max_contexts_per_document (int): Max contexts per doc (default: 3)
    - min_contexts_per_document (int): Min contexts per doc (default: 1)
    - max_context_length (int): Max context length in chunks (default: 3)
    - min_context_length (int): Min context length in chunks (default: 1)
    - chunk_size (int): Chunk size in characters (default: 1024)
    - chunk_overlap (int): Chunk overlap (default: 0)
    - context_quality_threshold (float): Quality threshold (default: 0.5)
    - context_similarity_threshold (float): Similarity threshold (default: 0.0)
    - max_retries (int): Max retries (default: 3)
    """

Usage Examples

Generate from Documents

from deepeval.synthesizer import Synthesizer

synthesizer = Synthesizer(model="gpt-4")

# Generate goldens from documents
goldens = synthesizer.generate_goldens_from_docs(
    document_paths=[
        "./docs/product_manual.pdf",
        "./docs/faq.txt",
        "./docs/user_guide.docx"
    ],
    max_goldens_per_context=2,
    include_expected_output=True
)

print(f"Generated {len(goldens)} goldens")
for golden in goldens[:3]:
    print(f"Input: {golden.input}")
    print(f"Expected: {golden.expected_output}\n")

# Save to file
synthesizer.save_as(
    file_type="json",
    directory="./synthetic_data",
    file_name="doc_goldens"
)

Generate from Contexts

from deepeval.synthesizer import Synthesizer

synthesizer = Synthesizer()

# Generate from predefined contexts
contexts = [
    ["Our return policy allows 30-day full refunds"],
    ["Shipping takes 3-5 business days for US orders"],
    ["Premium members get free expedited shipping"]
]

goldens = synthesizer.generate_goldens_from_contexts(
    contexts=contexts,
    max_goldens_per_context=3,
    include_expected_output=True
)

Generate from Scratch

from deepeval.synthesizer import Synthesizer, StylingConfig

synthesizer = Synthesizer(
    styling_config=StylingConfig(
        scenario="Customer support for an e-commerce platform",
        task="Answer customer questions about products, shipping, and returns",
        input_format="Natural language questions",
        expected_output_format="Helpful, concise answers"
    )
)

# Generate from scratch using styling config
goldens = synthesizer.generate_goldens_from_scratch(
    num_goldens=50
)

print(f"Generated {len(goldens)} synthetic goldens")

Apply Evolution Strategies

from deepeval.synthesizer import Synthesizer, EvolutionConfig, Evolution

# Configure evolution strategies
evolution_config = EvolutionConfig(
    num_evolutions=2,  # Apply 2 rounds of evolution
    evolutions={
        Evolution.REASONING: 0.3,      # 30% reasoning
        Evolution.MULTICONTEXT: 0.2,   # 20% multi-context
        Evolution.CONCRETIZING: 0.2,   # 20% concretizing
        Evolution.CONSTRAINED: 0.15,   # 15% constrained
        Evolution.COMPARATIVE: 0.15    # 15% comparative
    }
)

synthesizer = Synthesizer(evolution_config=evolution_config)

goldens = synthesizer.generate_goldens_from_docs(
    document_paths=["./docs/guide.pdf"],
    max_goldens_per_context=3
)

Quality Filtration

from deepeval.synthesizer import Synthesizer, FiltrationConfig

# Configure quality filtration
filtration_config = FiltrationConfig(
    synthetic_input_quality_threshold=0.7,  # Higher quality threshold
    max_quality_retries=5,  # More retry attempts
    critic_model="gpt-4"  # Use GPT-4 as quality critic
)

synthesizer = Synthesizer(
    filtration_config=filtration_config,
    cost_tracking=True  # Track API costs
)

goldens = synthesizer.generate_goldens_from_contexts(
    contexts=[["High-quality context about AI"]],
    max_goldens_per_context=5
)

# Only high-quality goldens will be generated

Custom Context Construction

from deepeval.synthesizer import Synthesizer, ContextConstructionConfig
from deepeval.models import OpenAIEmbeddingModel

# Configure context construction
context_config = ContextConstructionConfig(
    embedder=OpenAIEmbeddingModel(model="text-embedding-3-large"),
    chunk_size=512,  # Smaller chunks
    chunk_overlap=50,  # Some overlap
    max_contexts_per_document=5,
    min_context_length=2,  # At least 2 chunks per context
    max_context_length=4,  # At most 4 chunks per context
    context_quality_threshold=0.6,
    context_similarity_threshold=0.3  # Avoid very similar contexts
)

synthesizer = Synthesizer()

goldens = synthesizer.generate_goldens_from_docs(
    document_paths=["./large_document.pdf"],
    context_construction_config=context_config,
    max_goldens_per_context=3
)

Evolve Existing Goldens

from deepeval.synthesizer import Synthesizer
from deepeval.dataset import Golden

# Existing goldens
existing_goldens = [
    Golden(input="What is Python?", expected_output="Python is a programming language"),
    Golden(input="What is Java?", expected_output="Java is a programming language")
]

synthesizer = Synthesizer()

# Generate more goldens based on existing ones
new_goldens = synthesizer.generate_goldens_from_goldens(
    goldens=existing_goldens,
    max_goldens_per_golden=3,  # Generate 3 variations per golden
    include_expected_output=True
)

print(f"Generated {len(new_goldens)} new goldens from {len(existing_goldens)} existing")

Async Generation

import asyncio
from deepeval.synthesizer import Synthesizer

async def generate_data():
    synthesizer = Synthesizer(
        async_mode=True,
        max_concurrent=50  # Higher concurrency
    )

    # Async generation
    goldens = await synthesizer.a_generate_goldens_from_docs(
        document_paths=["./doc1.pdf", "./doc2.pdf"],
        max_goldens_per_context=5
    )

    return goldens

# Run async
goldens = asyncio.run(generate_data())

Save and Export

from deepeval.synthesizer import Synthesizer

synthesizer = Synthesizer()
goldens = synthesizer.generate_goldens_from_scratch(num_goldens=100)

# Save as JSON
synthesizer.save_as(
    file_type="json",
    directory="./data",
    file_name="synthetic_goldens"
)

# Save as CSV
synthesizer.save_as(
    file_type="csv",
    directory="./data",
    file_name="synthetic_goldens"
)

# Convert to pandas DataFrame for analysis
df = synthesizer.to_pandas()
print(df.head())
print(df.describe())

Complete Example

from deepeval.synthesizer import (
    Synthesizer,
    EvolutionConfig,
    Evolution,
    FiltrationConfig,
    StylingConfig,
    ContextConstructionConfig
)
from deepeval.models import GPTModel, OpenAIEmbeddingModel

# Configure synthesizer with all options
synthesizer = Synthesizer(
    model=GPTModel(model="gpt-4"),
    async_mode=True,
    max_concurrent=20,
    evolution_config=EvolutionConfig(
        num_evolutions=2,
        evolutions={
            Evolution.REASONING: 0.4,
            Evolution.MULTICONTEXT: 0.3,
            Evolution.CONCRETIZING: 0.3
        }
    ),
    filtration_config=FiltrationConfig(
        synthetic_input_quality_threshold=0.7,
        max_quality_retries=3,
        critic_model="gpt-4"
    ),
    styling_config=StylingConfig(
        scenario="Technical support for software products",
        task="Help users troubleshoot issues",
        input_format="User problem descriptions",
        expected_output_format="Step-by-step troubleshooting guides"
    ),
    cost_tracking=True
)

# Generate high-quality synthetic data
goldens = synthesizer.generate_goldens_from_docs(
    document_paths=["./technical_docs.pdf"],
    context_construction_config=ContextConstructionConfig(
        embedder=OpenAIEmbeddingModel(),
        chunk_size=1024,
        max_contexts_per_document=10
    ),
    max_goldens_per_context=2,
    include_expected_output=True
)

# Save results
synthesizer.save_as(
    file_type="json",
    directory="./synthetic_data",
    file_name="technical_support_goldens"
)

print(f"Generated {len(goldens)} high-quality synthetic goldens")

Install with Tessl CLI