docs
tessl install tessl/pypi-pipecat-ai@0.0.0An open source framework for building real-time voice and multimodal conversational AI agents with support for speech-to-text, text-to-speech, LLMs, and multiple transport protocols
LLM services integrate Large Language Model providers into Pipecat pipelines. The framework provides a universal interface supporting 20+ providers including OpenAI, Anthropic, Google, Azure, AWS, and many others.
{ .api }
from pipecat.services.llm_service import LLMService
class LLMService(AIService):
"""Base class for all LLM services.
Provides universal interface for chat completions, function calling,
and context management across different LLM providers.
Key Features:
- Universal context management via LLMContext
- Function/tool calling support
- Streaming responses
- Event handlers for monitoring
- Automatic message format conversion via adapters
Methods:
set_context(context): Set LLM context
register_function(name, handler, ...): Register function for calling
unregister_function(name): Unregister function
process_frame(frame, direction): Process frames
Event Handlers:
on_completion_timeout: LLM completion timeout
on_function_call_start: Function call initiated
on_function_call_end: Function call completed
Example:
from pipecat.services.openai import OpenAILLMService
from pipecat.processors.aggregators.llm_context import LLMContext
context = LLMContext(
messages=[{"role": "system", "content": "You are helpful."}]
)
llm = OpenAILLMService(
api_key="your-key",
model="gpt-4"
)
llm.set_context(context)
"""
def __init__(self, **kwargs):
"""Initialize LLM service.
Args:
**kwargs: Provider-specific configuration
"""
pass
def set_context(self, context: LLMContext):
"""Set LLM context.
Args:
context: LLM context with messages, tools, settings
"""
pass
def register_function(
self,
name: str,
handler: Callable,
description: str = "",
properties: Optional[Dict] = None,
required: Optional[List[str]] = None
):
"""Register function for calling.
Args:
name: Function name
handler: Async function to call
description: Function description
properties: JSON schema properties
required: Required parameter names
Example:
async def get_weather(location: str) -> dict:
return {"temp": 72, "condition": "sunny"}
llm.register_function(
name="get_weather",
handler=get_weather,
description="Get weather for a location",
properties={
"location": {"type": "string", "description": "City name"}
},
required=["location"]
)
"""
pass
def unregister_function(self, name: str):
"""Unregister function.
Args:
name: Function name to unregister
"""
pass
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process frames.
Handles:
- LLMRunFrame: Triggers LLM inference
- LLMMessagesAppendFrame: Updates context
- LLMSetToolsFrame: Updates available tools
Args:
frame: Frame to process
direction: Frame direction
"""
pass{ .api }
from pipecat.services.openai import OpenAILLMService
class OpenAILLMService(LLMService):
"""OpenAI chat completions service.
Supports: GPT-4, GPT-4 Turbo, GPT-3.5 Turbo, etc.
Args:
api_key: OpenAI API key
model: Model identifier (e.g., "gpt-4", "gpt-3.5-turbo")
base_url: Optional base URL for API
params: Model parameters (temperature, max_tokens, etc.)
Example:
llm = OpenAILLMService(
api_key="sk-...",
model="gpt-4-turbo",
params={
"temperature": 0.7,
"max_tokens": 1000,
"top_p": 0.9
}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str = "gpt-4",
base_url: Optional[str] = None,
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.anthropic import AnthropicLLMService
class AnthropicLLMService(LLMService):
"""Anthropic Claude service.
Supports: Claude 3.5 Sonnet, Claude 3 Opus, Claude 3 Haiku, etc.
Args:
api_key: Anthropic API key
model: Model identifier (e.g., "claude-3-5-sonnet-20241022")
params: Model parameters
Example:
llm = AnthropicLLMService(
api_key="sk-ant-...",
model="claude-3-5-sonnet-20241022",
params={
"temperature": 0.7,
"max_tokens": 4096
}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str = "claude-3-5-sonnet-20241022",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.google import GoogleLLMService
class GoogleLLMService(LLMService):
"""Google Gemini API service.
Supports: Gemini Pro, Gemini Pro Vision, etc.
Args:
api_key: Google AI API key
model: Model identifier (e.g., "gemini-pro")
params: Model parameters
Example:
llm = GoogleLLMService(
api_key="AIza...",
model="gemini-pro",
params={"temperature": 0.9}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str = "gemini-pro",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.azure import AzureLLMService
class AzureLLMService(LLMService):
"""Azure OpenAI service.
Uses Azure's OpenAI deployment.
Args:
api_key: Azure API key
endpoint: Azure endpoint URL
model: Deployment name
api_version: Azure API version
params: Model parameters
Example:
llm = AzureLLMService(
api_key="...",
endpoint="https://your-resource.openai.azure.com/",
model="your-deployment-name",
api_version="2024-02-15-preview",
params={"temperature": 0.7}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
endpoint: str,
model: str,
api_version: str = "2024-02-15-preview",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.aws import AWSLLMService
class AWSLLMService(LLMService):
"""AWS Bedrock service.
Supports: Claude, Titan, Llama, Mistral, etc. via Bedrock.
Args:
model: Bedrock model ID
aws_access_key_id: AWS access key
aws_secret_access_key: AWS secret key
aws_region: AWS region
params: Model parameters
Example:
llm = AWSLLMService(
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
aws_access_key_id="...",
aws_secret_access_key="...",
aws_region="us-west-2",
params={"temperature": 0.7}
)
llm.set_context(context)
"""
def __init__(
self,
model: str,
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region: str = "us-east-1",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.groq import GroqLLMService
class GroqLLMService(LLMService):
"""Groq LLM service (ultra-fast inference).
Args:
api_key: Groq API key
model: Model identifier (e.g., "mixtral-8x7b-32768")
Example:
llm = GroqLLMService(
api_key="gsk_...",
model="mixtral-8x7b-32768"
)
"""
pass{ .api }
from pipecat.services.ollama import OllamaLLMService
class OllamaLLMService(LLMService):
"""Ollama local model service.
Run models locally via Ollama.
Args:
base_url: Ollama server URL
model: Model name (e.g., "llama2", "mistral")
Example:
llm = OllamaLLMService(
base_url="http://localhost:11434",
model="llama2"
)
"""
def __init__(
self,
base_url: str = "http://localhost:11434",
model: str = "llama2",
**kwargs
):
pass{ .api }
from pipecat.services.cerebras import CerebrasLLMService
class CerebrasLLMService(LLMService):
"""Cerebras LLM service integration.
Fast inference with Cerebras models for low-latency applications.
Args:
api_key: Cerebras API key
model: Model identifier
params: Model parameters
Example:
llm = CerebrasLLMService(
api_key="your-api-key",
model="llama3.1-8b",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.deepseek import DeepSeekLLMService
class DeepSeekLLMService(LLMService):
"""DeepSeek LLM service integration.
High-performance reasoning models from DeepSeek AI.
Args:
api_key: DeepSeek API key
model: Model identifier (e.g., "deepseek-chat", "deepseek-coder")
base_url: Optional base URL
params: Model parameters
Example:
llm = DeepSeekLLMService(
api_key="your-api-key",
model="deepseek-chat",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.fireworks import FireworksLLMService
class FireworksLLMService(LLMService):
"""Fireworks AI LLM service integration.
Fast inference for open-source models via Fireworks AI.
Args:
api_key: Fireworks API key
model: Model identifier (e.g., "accounts/fireworks/models/llama-v3p1-8b-instruct")
base_url: Optional base URL
params: Model parameters
Example:
llm = FireworksLLMService(
api_key="your-api-key",
model="accounts/fireworks/models/llama-v3p1-8b-instruct",
params={"temperature": 0.7, "max_tokens": 1000}
)
"""
pass{ .api }
from pipecat.services.gradium import GradiumLLMService
class GradiumLLMService(LLMService):
"""Gradium LLM service integration.
Gradium platform for LLM inference and fine-tuning.
Args:
api_key: Gradium API key
model: Model identifier
base_url: Optional base URL
params: Model parameters
Example:
llm = GradiumLLMService(
api_key="your-api-key",
model="gradium-model",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.nim import NIMLLMService
class NIMLLMService(LLMService):
"""NVIDIA NIM LLM service integration.
Deploy optimized inference microservices with NVIDIA NIM.
Args:
api_key: NIM API key
model: Model identifier
base_url: NIM endpoint URL
params: Model parameters
Example:
llm = NIMLLMService(
api_key="your-api-key",
model="meta/llama-3.1-8b-instruct",
base_url="https://your-nim-endpoint",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.nvidia import NVIDIALLMService
class NVIDIALLMService(LLMService):
"""NVIDIA LLM service integration.
Access NVIDIA's LLM models and inference services.
Args:
api_key: NVIDIA API key
model: Model identifier
base_url: Optional base URL
params: Model parameters
Example:
llm = NVIDIALLMService(
api_key="your-api-key",
model="nvidia/llama-3.1-nemotron-70b-instruct",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.openpipe import OpenPipeLLMService
class OpenPipeLLMService(LLMService):
"""OpenPipe LLM service integration.
OpenPipe platform for fine-tuning and deploying custom LLMs.
Args:
api_key: OpenPipe API key
model: Model identifier
base_url: Optional base URL
params: Model parameters
Example:
llm = OpenPipeLLMService(
api_key="your-api-key",
model="openpipe:your-model",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.openrouter import OpenRouterLLMService
class OpenRouterLLMService(LLMService):
"""OpenRouter LLM service integration.
Unified API for accessing multiple LLM providers through OpenRouter.
Args:
api_key: OpenRouter API key
model: Model identifier (e.g., "anthropic/claude-3-opus", "openai/gpt-4")
base_url: Optional base URL
params: Model parameters
Example:
llm = OpenRouterLLMService(
api_key="your-api-key",
model="anthropic/claude-3-opus",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.qwen import QwenLLMService
class QwenLLMService(LLMService):
"""Qwen LLM service integration.
Alibaba's Qwen (Tongyi Qianwen) large language models.
Args:
api_key: Qwen API key
model: Model identifier (e.g., "qwen-turbo", "qwen-plus", "qwen-max")
base_url: Optional base URL
params: Model parameters
Example:
llm = QwenLLMService(
api_key="your-api-key",
model="qwen-max",
params={"temperature": 0.7}
)
"""
pass{ .api }
from pipecat.services.mistral import MistralLLMService
class MistralLLMService(LLMService):
"""Mistral AI LLM service integration.
High-performance open-source and proprietary models from Mistral AI using
OpenAI-compatible interface.
Supported Models:
- mistral-large-latest: Flagship model for complex tasks
- mistral-small-latest: Efficient model for simple tasks
- ministral-8b-latest: Small, fast model
- codestral-latest: Specialized code generation model
Args:
api_key: Mistral API key
model: Model identifier (default: "mistral-small-latest")
base_url: API endpoint (default: "https://api.mistral.ai/v1")
params: Model parameters (temperature, max_tokens, etc.)
Example:
llm = MistralLLMService(
api_key="your-api-key",
model="mistral-large-latest",
params={
"temperature": 0.7,
"max_tokens": 2000
}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str = "mistral-small-latest",
base_url: str = "https://api.mistral.ai/v1",
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Mistral AI Documentation
{ .api }
from pipecat.services.together import TogetherLLMService
class TogetherLLMService(LLMService):
"""Together AI LLM service integration.
Fast inference for open-source models including Llama, Mixtral, and more
via Together AI's optimized infrastructure.
Supported Models:
- meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
- meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
- mistralai/Mixtral-8x7B-Instruct-v0.1
- And 50+ other open-source models
Args:
api_key: Together AI API key
model: Model identifier (default: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo")
base_url: API endpoint (default: "https://api.together.xyz/v1")
params: Model parameters
Example:
llm = TogetherLLMService(
api_key="your-api-key",
model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
params={
"temperature": 0.7,
"max_tokens": 1000
}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
base_url: str = "https://api.together.xyz/v1",
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Together AI Documentation
{ .api }
from pipecat.services.perplexity import PerplexityLLMService
class PerplexityLLMService(LLMService):
"""Perplexity AI LLM service integration.
Advanced models with real-time web search capabilities and citation support.
Supported Models:
- llama-3.1-sonar-small-128k-online
- llama-3.1-sonar-large-128k-online
- llama-3.1-sonar-huge-128k-online
Args:
api_key: Perplexity API key
model: Model identifier
base_url: API endpoint
params: Model parameters
Example:
llm = PerplexityLLMService(
api_key="your-api-key",
model="llama-3.1-sonar-large-128k-online",
params={"temperature": 0.7}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str,
base_url: Optional[str] = None,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Perplexity API Documentation
{ .api }
from pipecat.services.sambanova import SambaNovaLLMService
class SambaNovaLLMService(LLMService):
"""SambaNova LLM service integration.
Ultra-fast AI inference using SambaNova's specialized hardware.
Args:
api_key: SambaNova API key
model: Model identifier
base_url: API endpoint
params: Model parameters
Example:
llm = SambaNovaLLMService(
api_key="your-api-key",
model="Meta-Llama-3.1-8B-Instruct",
params={"temperature": 0.7}
)
llm.set_context(context)
"""
def __init__(
self,
api_key: str,
model: str,
base_url: Optional[str] = None,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: SambaNova Documentation
{ .api }
# Define async function
async def get_weather(location: str, units: str = "celsius") -> dict:
"""Get weather for a location."""
# Implementation
return {
"location": location,
"temperature": 72,
"units": units,
"condition": "sunny"
}
# Register with LLM
llm.register_function(
name="get_weather",
handler=get_weather,
description="Get current weather for a location",
properties={
"location": {
"type": "string",
"description": "City name or location"
},
"units": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature units"
}
},
required=["location"]
){ .api }
# 1. LLM decides to call function
# -> Emits FunctionCallFromLLM with function name and arguments
# 2. Function executes
# -> Returns result
# 3. Result added to context
# -> FunctionCallResultFrame with result
# 4. LLM called again with function result
# -> Generates response using function result
# Monitor function calls
@llm.event_handler("on_function_call_start")
async def handle_function_start(function_name: str):
print(f"Calling function: {function_name}")
@llm.event_handler("on_function_call_end")
async def handle_function_end(function_name: str, result: Any):
print(f"Function {function_name} returned: {result}"){ .api }
from pipecat.services.openai import OpenAILLMService
from pipecat.processors.aggregators.llm_context import LLMContext, LLMContextAggregatorPair
from pipecat.pipeline.pipeline import Pipeline
# Setup context
context = LLMContext(
messages=[
{"role": "system", "content": "You are a helpful assistant."}
],
settings={"temperature": 0.7}
)
# Create LLM service
llm = OpenAILLMService(api_key="key", model="gpt-4")
llm.set_context(context)
# Create aggregators
aggregators = LLMContextAggregatorPair(context=context)
# Build pipeline
pipeline = Pipeline([
aggregators.user,
llm,
aggregators.assistant
]){ .api }
# Use different providers for different tasks
from pipecat.services.openai import OpenAILLMService
from pipecat.services.anthropic import AnthropicLLMService
# Fast provider for quick responses
fast_llm = OpenAILLMService(api_key="key", model="gpt-3.5-turbo")
# Powerful provider for complex tasks
powerful_llm = AnthropicLLMService(api_key="key", model="claude-3-opus-20240229")
# Switch based on task complexity
from pipecat.pipeline.service_switcher import ServiceSwitcher, ServiceSwitcherStrategyManual
llm_switcher = ServiceSwitcher(
services=[fast_llm, powerful_llm],
strategy=ServiceSwitcherStrategyManual(default_index=0)
){ .api }
from pipecat.services.openai import OpenAILLMService
from pipecat.processors.aggregators.llm_context import LLMContext
# Define functions
async def search_web(query: str) -> dict:
# Search implementation
return {"results": [...]}
async def calculate(expression: str) -> float:
# Calculator implementation
return eval(expression)
# Setup LLM with functions
context = LLMContext(
messages=[
{"role": "system", "content": "You are a helpful assistant with web search and calculator tools."}
]
)
llm = OpenAILLMService(api_key="key", model="gpt-4")
llm.set_context(context)
# Register functions
llm.register_function(
name="search_web",
handler=search_web,
description="Search the web",
properties={"query": {"type": "string"}},
required=["query"]
)
llm.register_function(
name="calculate",
handler=calculate,
description="Calculate mathematical expression",
properties={"expression": {"type": "string"}},
required=["expression"]
){ .api }
# LLM services automatically stream responses as LLMTextFrame
# Monitor streaming
class StreamMonitor(FrameProcessor):
async def process_frame(self, frame, direction):
if isinstance(frame, LLMTextFrame):
print(f"LLM chunk: {frame.text}", end="", flush=True)
await self.push_frame(frame, direction)
pipeline = Pipeline([
user_agg,
llm,
StreamMonitor(), # Monitor streaming
assistant_agg
]){ .api }
# Good: Clear system message for voice
context = LLMContext(
messages=[
{
"role": "system",
"content": "You are a helpful voice assistant. Keep responses concise and natural for speech. Avoid using markdown or special formatting."
}
]
)
# Bad: No guidance for voice output
context = LLMContext(messages=[]){ .api }
# Monitor completion timeouts
@llm.event_handler("on_completion_timeout")
async def handle_timeout():
print("LLM completion timed out")
await task.queue_frame(
TextFrame("I'm sorry, I'm taking too long to respond.")
){ .api }
# Good: Match model to task
# Quick conversational responses
quick_llm = OpenAILLMService(model="gpt-3.5-turbo") # Fast, cheap
# Complex reasoning
complex_llm = OpenAILLMService(model="gpt-4") # Powerful, accurate
# Bad: Always using most expensive model
llm = OpenAILLMService(model="gpt-4") # Unnecessary for simple tasks{ .api }
async def safe_function(param: str) -> dict:
"""Function with error handling."""
try:
# Function logic
result = await some_operation(param)
return {"success": True, "result": result}
except Exception as e:
# Return error in result
return {
"success": False,
"error": str(e)
}
llm.register_function("safe_function", safe_function, ...){ .api }
class ContextSizeManager(FrameProcessor):
"""Manage context size to avoid token limits."""
def __init__(self, context: LLMContext, max_messages: int = 20):
super().__init__()
self._context = context
self._max = max_messages
async def process_frame(self, frame, direction):
if isinstance(frame, LLMMessagesAppendFrame):
messages = self._context.get_messages()
if len(messages) > self._max:
# Keep system + recent messages
system = [m for m in messages if m["role"] == "system"]
recent = [m for m in messages if m["role"] != "system"][-self._max:]
self._context.messages = system + recent
await self.push_frame(frame, direction)