or run

tessl search
Log in

Version

Workspace
tessl
Visibility
Public
Created
Last updated
Describes
pypipkg:pypi/pipecat-ai@0.0.x

docs

core-concepts.mdindex.mdpipeline.mdrunner.mdtransports.mdturns.md
tile.json

tessl/pypi-pipecat-ai

tessl install tessl/pypi-pipecat-ai@0.0.0

An open source framework for building real-time voice and multimodal conversational AI agents with support for speech-to-text, text-to-speech, LLMs, and multiple transport protocols

llm-services.mddocs/services/

LLM Services

LLM services integrate Large Language Model providers into Pipecat pipelines. The framework provides a universal interface supporting 20+ providers including OpenAI, Anthropic, Google, Azure, AWS, and many others.

Base LLM Service

LLMService

{ .api }
from pipecat.services.llm_service import LLMService

class LLMService(AIService):
    """Base class for all LLM services.

    Provides universal interface for chat completions, function calling,
    and context management across different LLM providers.

    Key Features:
    - Universal context management via LLMContext
    - Function/tool calling support
    - Streaming responses
    - Event handlers for monitoring
    - Automatic message format conversion via adapters

    Methods:
        set_context(context): Set LLM context
        register_function(name, handler, ...): Register function for calling
        unregister_function(name): Unregister function
        process_frame(frame, direction): Process frames

    Event Handlers:
        on_completion_timeout: LLM completion timeout
        on_function_call_start: Function call initiated
        on_function_call_end: Function call completed

    Example:
        from pipecat.services.openai import OpenAILLMService
        from pipecat.processors.aggregators.llm_context import LLMContext

        context = LLMContext(
            messages=[{"role": "system", "content": "You are helpful."}]
        )

        llm = OpenAILLMService(
            api_key="your-key",
            model="gpt-4"
        )
        llm.set_context(context)
    """

    def __init__(self, **kwargs):
        """Initialize LLM service.

        Args:
            **kwargs: Provider-specific configuration
        """
        pass

    def set_context(self, context: LLMContext):
        """Set LLM context.

        Args:
            context: LLM context with messages, tools, settings
        """
        pass

    def register_function(
        self,
        name: str,
        handler: Callable,
        description: str = "",
        properties: Optional[Dict] = None,
        required: Optional[List[str]] = None
    ):
        """Register function for calling.

        Args:
            name: Function name
            handler: Async function to call
            description: Function description
            properties: JSON schema properties
            required: Required parameter names

        Example:
            async def get_weather(location: str) -> dict:
                return {"temp": 72, "condition": "sunny"}

            llm.register_function(
                name="get_weather",
                handler=get_weather,
                description="Get weather for a location",
                properties={
                    "location": {"type": "string", "description": "City name"}
                },
                required=["location"]
            )
        """
        pass

    def unregister_function(self, name: str):
        """Unregister function.

        Args:
            name: Function name to unregister
        """
        pass

    async def process_frame(self, frame: Frame, direction: FrameDirection):
        """Process frames.

        Handles:
        - LLMRunFrame: Triggers LLM inference
        - LLMMessagesAppendFrame: Updates context
        - LLMSetToolsFrame: Updates available tools

        Args:
            frame: Frame to process
            direction: Frame direction
        """
        pass

Major LLM Providers

OpenAI

{ .api }
from pipecat.services.openai import OpenAILLMService

class OpenAILLMService(LLMService):
    """OpenAI chat completions service.

    Supports: GPT-4, GPT-4 Turbo, GPT-3.5 Turbo, etc.

    Args:
        api_key: OpenAI API key
        model: Model identifier (e.g., "gpt-4", "gpt-3.5-turbo")
        base_url: Optional base URL for API
        params: Model parameters (temperature, max_tokens, etc.)

    Example:
        llm = OpenAILLMService(
            api_key="sk-...",
            model="gpt-4-turbo",
            params={
                "temperature": 0.7,
                "max_tokens": 1000,
                "top_p": 0.9
            }
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str = "gpt-4",
        base_url: Optional[str] = None,
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Anthropic

{ .api }
from pipecat.services.anthropic import AnthropicLLMService

class AnthropicLLMService(LLMService):
    """Anthropic Claude service.

    Supports: Claude 3.5 Sonnet, Claude 3 Opus, Claude 3 Haiku, etc.

    Args:
        api_key: Anthropic API key
        model: Model identifier (e.g., "claude-3-5-sonnet-20241022")
        params: Model parameters

    Example:
        llm = AnthropicLLMService(
            api_key="sk-ant-...",
            model="claude-3-5-sonnet-20241022",
            params={
                "temperature": 0.7,
                "max_tokens": 4096
            }
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str = "claude-3-5-sonnet-20241022",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Google

{ .api }
from pipecat.services.google import GoogleLLMService

class GoogleLLMService(LLMService):
    """Google Gemini API service.

    Supports: Gemini Pro, Gemini Pro Vision, etc.

    Args:
        api_key: Google AI API key
        model: Model identifier (e.g., "gemini-pro")
        params: Model parameters

    Example:
        llm = GoogleLLMService(
            api_key="AIza...",
            model="gemini-pro",
            params={"temperature": 0.9}
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str = "gemini-pro",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Azure OpenAI

{ .api }
from pipecat.services.azure import AzureLLMService

class AzureLLMService(LLMService):
    """Azure OpenAI service.

    Uses Azure's OpenAI deployment.

    Args:
        api_key: Azure API key
        endpoint: Azure endpoint URL
        model: Deployment name
        api_version: Azure API version
        params: Model parameters

    Example:
        llm = AzureLLMService(
            api_key="...",
            endpoint="https://your-resource.openai.azure.com/",
            model="your-deployment-name",
            api_version="2024-02-15-preview",
            params={"temperature": 0.7}
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        endpoint: str,
        model: str,
        api_version: str = "2024-02-15-preview",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

AWS Bedrock

{ .api }
from pipecat.services.aws import AWSLLMService

class AWSLLMService(LLMService):
    """AWS Bedrock service.

    Supports: Claude, Titan, Llama, Mistral, etc. via Bedrock.

    Args:
        model: Bedrock model ID
        aws_access_key_id: AWS access key
        aws_secret_access_key: AWS secret key
        aws_region: AWS region
        params: Model parameters

    Example:
        llm = AWSLLMService(
            model="anthropic.claude-3-5-sonnet-20241022-v2:0",
            aws_access_key_id="...",
            aws_secret_access_key="...",
            aws_region="us-west-2",
            params={"temperature": 0.7}
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        model: str,
        aws_access_key_id: Optional[str] = None,
        aws_secret_access_key: Optional[str] = None,
        aws_region: str = "us-east-1",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Additional LLM Providers

Groq

{ .api }
from pipecat.services.groq import GroqLLMService

class GroqLLMService(LLMService):
    """Groq LLM service (ultra-fast inference).

    Args:
        api_key: Groq API key
        model: Model identifier (e.g., "mixtral-8x7b-32768")

    Example:
        llm = GroqLLMService(
            api_key="gsk_...",
            model="mixtral-8x7b-32768"
        )
    """
    pass

Ollama

{ .api }
from pipecat.services.ollama import OllamaLLMService

class OllamaLLMService(LLMService):
    """Ollama local model service.

    Run models locally via Ollama.

    Args:
        base_url: Ollama server URL
        model: Model name (e.g., "llama2", "mistral")

    Example:
        llm = OllamaLLMService(
            base_url="http://localhost:11434",
            model="llama2"
        )
    """

    def __init__(
        self,
        base_url: str = "http://localhost:11434",
        model: str = "llama2",
        **kwargs
    ):
        pass

Cerebras

{ .api }
from pipecat.services.cerebras import CerebrasLLMService

class CerebrasLLMService(LLMService):
    """Cerebras LLM service integration.

    Fast inference with Cerebras models for low-latency applications.

    Args:
        api_key: Cerebras API key
        model: Model identifier
        params: Model parameters

    Example:
        llm = CerebrasLLMService(
            api_key="your-api-key",
            model="llama3.1-8b",
            params={"temperature": 0.7}
        )
    """
    pass

DeepSeek

{ .api }
from pipecat.services.deepseek import DeepSeekLLMService

class DeepSeekLLMService(LLMService):
    """DeepSeek LLM service integration.

    High-performance reasoning models from DeepSeek AI.

    Args:
        api_key: DeepSeek API key
        model: Model identifier (e.g., "deepseek-chat", "deepseek-coder")
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = DeepSeekLLMService(
            api_key="your-api-key",
            model="deepseek-chat",
            params={"temperature": 0.7}
        )
    """
    pass

Fireworks

{ .api }
from pipecat.services.fireworks import FireworksLLMService

class FireworksLLMService(LLMService):
    """Fireworks AI LLM service integration.

    Fast inference for open-source models via Fireworks AI.

    Args:
        api_key: Fireworks API key
        model: Model identifier (e.g., "accounts/fireworks/models/llama-v3p1-8b-instruct")
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = FireworksLLMService(
            api_key="your-api-key",
            model="accounts/fireworks/models/llama-v3p1-8b-instruct",
            params={"temperature": 0.7, "max_tokens": 1000}
        )
    """
    pass

Gradium

{ .api }
from pipecat.services.gradium import GradiumLLMService

class GradiumLLMService(LLMService):
    """Gradium LLM service integration.

    Gradium platform for LLM inference and fine-tuning.

    Args:
        api_key: Gradium API key
        model: Model identifier
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = GradiumLLMService(
            api_key="your-api-key",
            model="gradium-model",
            params={"temperature": 0.7}
        )
    """
    pass

NIM (NVIDIA Inference Microservices)

{ .api }
from pipecat.services.nim import NIMLLMService

class NIMLLMService(LLMService):
    """NVIDIA NIM LLM service integration.

    Deploy optimized inference microservices with NVIDIA NIM.

    Args:
        api_key: NIM API key
        model: Model identifier
        base_url: NIM endpoint URL
        params: Model parameters

    Example:
        llm = NIMLLMService(
            api_key="your-api-key",
            model="meta/llama-3.1-8b-instruct",
            base_url="https://your-nim-endpoint",
            params={"temperature": 0.7}
        )
    """
    pass

NVIDIA

{ .api }
from pipecat.services.nvidia import NVIDIALLMService

class NVIDIALLMService(LLMService):
    """NVIDIA LLM service integration.

    Access NVIDIA's LLM models and inference services.

    Args:
        api_key: NVIDIA API key
        model: Model identifier
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = NVIDIALLMService(
            api_key="your-api-key",
            model="nvidia/llama-3.1-nemotron-70b-instruct",
            params={"temperature": 0.7}
        )
    """
    pass

OpenPipe

{ .api }
from pipecat.services.openpipe import OpenPipeLLMService

class OpenPipeLLMService(LLMService):
    """OpenPipe LLM service integration.

    OpenPipe platform for fine-tuning and deploying custom LLMs.

    Args:
        api_key: OpenPipe API key
        model: Model identifier
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = OpenPipeLLMService(
            api_key="your-api-key",
            model="openpipe:your-model",
            params={"temperature": 0.7}
        )
    """
    pass

OpenRouter

{ .api }
from pipecat.services.openrouter import OpenRouterLLMService

class OpenRouterLLMService(LLMService):
    """OpenRouter LLM service integration.

    Unified API for accessing multiple LLM providers through OpenRouter.

    Args:
        api_key: OpenRouter API key
        model: Model identifier (e.g., "anthropic/claude-3-opus", "openai/gpt-4")
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = OpenRouterLLMService(
            api_key="your-api-key",
            model="anthropic/claude-3-opus",
            params={"temperature": 0.7}
        )
    """
    pass

Qwen

{ .api }
from pipecat.services.qwen import QwenLLMService

class QwenLLMService(LLMService):
    """Qwen LLM service integration.

    Alibaba's Qwen (Tongyi Qianwen) large language models.

    Args:
        api_key: Qwen API key
        model: Model identifier (e.g., "qwen-turbo", "qwen-plus", "qwen-max")
        base_url: Optional base URL
        params: Model parameters

    Example:
        llm = QwenLLMService(
            api_key="your-api-key",
            model="qwen-max",
            params={"temperature": 0.7}
        )
    """
    pass

Mistral

{ .api }
from pipecat.services.mistral import MistralLLMService

class MistralLLMService(LLMService):
    """Mistral AI LLM service integration.

    High-performance open-source and proprietary models from Mistral AI using
    OpenAI-compatible interface.

    Supported Models:
        - mistral-large-latest: Flagship model for complex tasks
        - mistral-small-latest: Efficient model for simple tasks
        - ministral-8b-latest: Small, fast model
        - codestral-latest: Specialized code generation model

    Args:
        api_key: Mistral API key
        model: Model identifier (default: "mistral-small-latest")
        base_url: API endpoint (default: "https://api.mistral.ai/v1")
        params: Model parameters (temperature, max_tokens, etc.)

    Example:
        llm = MistralLLMService(
            api_key="your-api-key",
            model="mistral-large-latest",
            params={
                "temperature": 0.7,
                "max_tokens": 2000
            }
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str = "mistral-small-latest",
        base_url: str = "https://api.mistral.ai/v1",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Learn more: Mistral AI Documentation

Together AI

{ .api }
from pipecat.services.together import TogetherLLMService

class TogetherLLMService(LLMService):
    """Together AI LLM service integration.

    Fast inference for open-source models including Llama, Mixtral, and more
    via Together AI's optimized infrastructure.

    Supported Models:
        - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
        - meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
        - mistralai/Mixtral-8x7B-Instruct-v0.1
        - And 50+ other open-source models

    Args:
        api_key: Together AI API key
        model: Model identifier (default: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo")
        base_url: API endpoint (default: "https://api.together.xyz/v1")
        params: Model parameters

    Example:
        llm = TogetherLLMService(
            api_key="your-api-key",
            model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
            params={
                "temperature": 0.7,
                "max_tokens": 1000
            }
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        base_url: str = "https://api.together.xyz/v1",
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Learn more: Together AI Documentation

Perplexity

{ .api }
from pipecat.services.perplexity import PerplexityLLMService

class PerplexityLLMService(LLMService):
    """Perplexity AI LLM service integration.

    Advanced models with real-time web search capabilities and citation support.

    Supported Models:
        - llama-3.1-sonar-small-128k-online
        - llama-3.1-sonar-large-128k-online
        - llama-3.1-sonar-huge-128k-online

    Args:
        api_key: Perplexity API key
        model: Model identifier
        base_url: API endpoint
        params: Model parameters

    Example:
        llm = PerplexityLLMService(
            api_key="your-api-key",
            model="llama-3.1-sonar-large-128k-online",
            params={"temperature": 0.7}
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str,
        base_url: Optional[str] = None,
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Learn more: Perplexity API Documentation

SambaNova

{ .api }
from pipecat.services.sambanova import SambaNovaLLMService

class SambaNovaLLMService(LLMService):
    """SambaNova LLM service integration.

    Ultra-fast AI inference using SambaNova's specialized hardware.

    Args:
        api_key: SambaNova API key
        model: Model identifier
        base_url: API endpoint
        params: Model parameters

    Example:
        llm = SambaNovaLLMService(
            api_key="your-api-key",
            model="Meta-Llama-3.1-8B-Instruct",
            params={"temperature": 0.7}
        )
        llm.set_context(context)
    """

    def __init__(
        self,
        api_key: str,
        model: str,
        base_url: Optional[str] = None,
        params: Optional[Dict] = None,
        **kwargs
    ):
        pass

Learn more: SambaNova Documentation

Function Calling

Registering Functions

{ .api }
# Define async function
async def get_weather(location: str, units: str = "celsius") -> dict:
    """Get weather for a location."""
    # Implementation
    return {
        "location": location,
        "temperature": 72,
        "units": units,
        "condition": "sunny"
    }

# Register with LLM
llm.register_function(
    name="get_weather",
    handler=get_weather,
    description="Get current weather for a location",
    properties={
        "location": {
            "type": "string",
            "description": "City name or location"
        },
        "units": {
            "type": "string",
            "enum": ["celsius", "fahrenheit"],
            "description": "Temperature units"
        }
    },
    required=["location"]
)

Function Call Flow

{ .api }
# 1. LLM decides to call function
# -> Emits FunctionCallFromLLM with function name and arguments

# 2. Function executes
# -> Returns result

# 3. Result added to context
# -> FunctionCallResultFrame with result

# 4. LLM called again with function result
# -> Generates response using function result

# Monitor function calls
@llm.event_handler("on_function_call_start")
async def handle_function_start(function_name: str):
    print(f"Calling function: {function_name}")

@llm.event_handler("on_function_call_end")
async def handle_function_end(function_name: str, result: Any):
    print(f"Function {function_name} returned: {result}")

Usage Patterns

Basic LLM Integration

{ .api }
from pipecat.services.openai import OpenAILLMService
from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextAggregatorPair
from pipecat.pipeline.pipeline import Pipeline

# Setup context
context = LLMContext(
    messages=[
        {"role": "system", "content": "You are a helpful assistant."}
    ],
    settings={"temperature": 0.7}
)

# Create LLM service
llm = OpenAILLMService(api_key="key", model="gpt-4")
llm.set_context(context)

# Create aggregators
aggregators = LLMContextAggregatorPair(context=context)

# Build pipeline
pipeline = Pipeline([
    aggregators.user,
    llm,
    aggregators.assistant
])

Multi-Provider Setup

{ .api }
# Use different providers for different tasks
from pipecat.services.openai import OpenAILLMService
from pipecat.services.anthropic import AnthropicLLMService

# Fast provider for quick responses
fast_llm = OpenAILLMService(api_key="key", model="gpt-3.5-turbo")

# Powerful provider for complex tasks
powerful_llm = AnthropicLLMService(api_key="key", model="claude-3-opus-20240229")

# Switch based on task complexity
from pipecat.pipeline.service_switcher import ServiceSwitcher, ServiceSwitcherStrategyManual

llm_switcher = ServiceSwitcher(
    services=[fast_llm, powerful_llm],
    strategy=ServiceSwitcherStrategyManual(default_index=0)
)

Function Calling Example

{ .api }
from pipecat.services.openai import OpenAILLMService
from pipecat.processors.aggregators.llm_context import LLMContext

# Define functions
async def search_web(query: str) -> dict:
    # Search implementation
    return {"results": [...]}

async def calculate(expression: str) -> float:
    # Calculator implementation
    return eval(expression)

# Setup LLM with functions
context = LLMContext(
    messages=[
        {"role": "system", "content": "You are a helpful assistant with web search and calculator tools."}
    ]
)

llm = OpenAILLMService(api_key="key", model="gpt-4")
llm.set_context(context)

# Register functions
llm.register_function(
    name="search_web",
    handler=search_web,
    description="Search the web",
    properties={"query": {"type": "string"}},
    required=["query"]
)

llm.register_function(
    name="calculate",
    handler=calculate,
    description="Calculate mathematical expression",
    properties={"expression": {"type": "string"}},
    required=["expression"]
)

Streaming Responses

{ .api }
# LLM services automatically stream responses as LLMTextFrame

# Monitor streaming
class StreamMonitor(FrameProcessor):
    async def process_frame(self, frame, direction):
        if isinstance(frame, LLMTextFrame):
            print(f"LLM chunk: {frame.text}", end="", flush=True)
        await self.push_frame(frame, direction)

pipeline = Pipeline([
    user_agg,
    llm,
    StreamMonitor(),  # Monitor streaming
    assistant_agg
])

Best Practices

Set Appropriate System Messages

{ .api }
# Good: Clear system message for voice
context = LLMContext(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful voice assistant. Keep responses concise and natural for speech. Avoid using markdown or special formatting."
        }
    ]
)

# Bad: No guidance for voice output
context = LLMContext(messages=[])

Handle Timeouts

{ .api }
# Monitor completion timeouts
@llm.event_handler("on_completion_timeout")
async def handle_timeout():
    print("LLM completion timed out")
    await task.queue_frame(
        TextFrame("I'm sorry, I'm taking too long to respond.")
    )

Use Appropriate Models

{ .api }
# Good: Match model to task
# Quick conversational responses
quick_llm = OpenAILLMService(model="gpt-3.5-turbo")  # Fast, cheap

# Complex reasoning
complex_llm = OpenAILLMService(model="gpt-4")  # Powerful, accurate

# Bad: Always using most expensive model
llm = OpenAILLMService(model="gpt-4")  # Unnecessary for simple tasks

Function Call Error Handling

{ .api }
async def safe_function(param: str) -> dict:
    """Function with error handling."""
    try:
        # Function logic
        result = await some_operation(param)
        return {"success": True, "result": result}

    except Exception as e:
        # Return error in result
        return {
            "success": False,
            "error": str(e)
        }

llm.register_function("safe_function", safe_function, ...)

Context Size Management

{ .api }
class ContextSizeManager(FrameProcessor):
    """Manage context size to avoid token limits."""

    def __init__(self, context: LLMContext, max_messages: int = 20):
        super().__init__()
        self._context = context
        self._max = max_messages

    async def process_frame(self, frame, direction):
        if isinstance(frame, LLMMessagesAppendFrame):
            messages = self._context.get_messages()

            if len(messages) > self._max:
                # Keep system + recent messages
                system = [m for m in messages if m["role"] == "system"]
                recent = [m for m in messages if m["role"] != "system"][-self._max:]
                self._context.messages = system + recent

        await self.push_frame(frame, direction)