docs
tessl install tessl/pypi-pipecat-ai@0.0.0An open source framework for building real-time voice and multimodal conversational AI agents with support for speech-to-text, text-to-speech, LLMs, and multiple transport protocols
{ .api }
from pipecat.pipeline.task import PipelineParams
class PipelineParams:
"""Pipeline task configuration.
Attributes:
allow_interruptions: Enable user interruptions (default: True)
enable_metrics: Collect TTFB metrics (default: False)
enable_usage_metrics: Collect token/character usage (default: False)
report_only_initial_ttfb: Report only first TTFB (default: False)
"""
def __init__(
self,
allow_interruptions: bool = True,
enable_metrics: bool = False,
enable_usage_metrics: bool = False,
report_only_initial_ttfb: bool = False
):
pass{ .api }
from pipecat.transports.base_transport import TransportParams
class TransportParams:
"""Base transport configuration."""
audio_in_enabled: bool = True
audio_out_enabled: bool = True
video_in_enabled: bool = False
video_out_enabled: bool = False
vad_enabled: bool = False
vad_analyzer: Optional[VADAnalyzer] = None
turn_analyzer: Optional[BaseTurnAnalyzer] = None
audio_in_sample_rate: int = 16000
audio_out_sample_rate: int = 24000
audio_in_channels: int = 1
audio_out_channels: int = 1
audio_out_bitrate: int = 64000
video_in_width: int = 1280
video_in_height: int = 720
video_in_framerate: int = 30
video_out_width: int = 1280
video_out_height: int = 720
video_out_framerate: int = 30
video_out_bitrate: int = 1000000{ .api }
from pipecat.transports.daily import DailyParams
class DailyParams(TransportParams):
"""Daily.co transport configuration."""
api_url: str = "https://api.daily.co/v1"
api_key: str = ""
audio_in_user_tracks: bool = True
dialin_settings: Optional[DailyDialinSettings] = None
camera_out_enabled: bool = True
microphone_out_enabled: bool = True
transcription_enabled: bool = False
transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings(){ .api }
from pipecat.audio.vad.vad_analyzer import VADParams
class VADParams:
"""Voice Activity Detection parameters."""
threshold: float = 0.5 # Detection threshold (0.0-1.0)
min_speech_duration_ms: float = 250 # Min speech to trigger
min_silence_duration_ms: float = 500 # Min silence to end
start_secs: float = 0.1 # Silence before speech
stop_secs: float = 0.5 # Silence after speech{ .api }
from pipecat.audio.turn.smart_turn import SmartTurnParams
class SmartTurnParams:
"""Smart turn analyzer parameters."""
stop_secs: float = 3 # Max silence before timeout
pre_speech_ms: float = 500 # Audio before speech starts
max_duration_secs: float = 8 # Max segment durationCommon parameters across LLM providers:
{ .api }
# OpenAI
llm = OpenAILLMService(
api_key="...",
model="gpt-4",
base_url=None, # Optional custom endpoint
params={
"temperature": 0.7, # 0.0-2.0, default 1.0
"max_tokens": 1000, # Max response tokens
"top_p": 0.9, # Nucleus sampling
"frequency_penalty": 0.0, # -2.0 to 2.0
"presence_penalty": 0.0, # -2.0 to 2.0
"seed": None, # Deterministic sampling
}
)
# Anthropic
llm = AnthropicLLMService(
api_key="...",
model="claude-3-5-sonnet-20241022",
params={
"temperature": 0.7,
"max_tokens": 4096,
"top_p": 0.9,
"top_k": None,
}
){ .api }
# OpenAI
tts = OpenAITTSService(
api_key="...",
voice="alloy", # alloy, echo, fable, onyx, nova, shimmer
model="tts-1-hd", # tts-1 or tts-1-hd
)
# ElevenLabs
tts = ElevenLabsTTSService(
api_key="...",
voice_id="...",
model="eleven_turbo_v2", # Model selection
params={
"stability": 0.5, # 0.0-1.0
"similarity_boost": 0.75, # 0.0-1.0
}
){ .api }
# Deepgram
stt = DeepgramSTTService(
api_key="...",
model="nova-2", # base, nova, nova-2, whisper-cloud
language="en",
interim_results=True,
smart_format=True,
punctuate=True,
vad_events=True,
)
# AssemblyAI
stt = AssemblyAISTTService(
api_key="...",
sample_rate=16000,
word_boost=["custom", "words"],
boost_param="high",
){ .api }
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.turns.user_start import VADUserTurnStartStrategy, MinWordsUserTurnStartStrategy
from pipecat.turns.user_stop import TranscriptionUserTurnStopStrategy
strategies = UserTurnStrategies(
start=[
VADUserTurnStartStrategy(
enable_interruptions=True,
enable_user_speaking_frames=True
),
MinWordsUserTurnStartStrategy(
min_words=2,
use_interim=True
)
],
stop=[
TranscriptionUserTurnStopStrategy(
timeout=0.7 # Seconds
)
]
){ .api }
from pipecat.turns.user_turn_processor import UserTurnProcessor
turn_processor = UserTurnProcessor(
user_turn_strategies=strategies,
user_turn_stop_timeout=5.0, # Auto-stop timeout (seconds)
user_idle_timeout=30.0, # Idle detection timeout (seconds, optional)
){ .api }
from pipecat.runner.types import DailyRunnerArguments, WebSocketRunnerArguments
# Daily arguments
daily_args = DailyRunnerArguments(
room_url="https://daily.co/room",
token="...",
handle_sigint=True,
handle_sigterm=True,
pipeline_idle_timeout_secs=300,
body={} # Additional request data
)
# WebSocket arguments
ws_args = WebSocketRunnerArguments(
websocket=websocket,
handle_sigint=False,
handle_sigterm=False,
body={}
){ .api }
from pipecat.pipeline.runner import PipelineRunner
runner = PipelineRunner(
name="my-bot",
handle_sigint=True, # Handle Ctrl+C
handle_sigterm=True, # Handle container termination
force_gc=False, # Force garbage collection
loop=None # Event loop (uses current if None)
)Common environment variables for configuration:
# API Keys
DAILY_API_KEY=your-daily-api-key
OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
DEEPGRAM_API_KEY=...
ELEVENLABS_API_KEY=...
# Daily Configuration
DAILY_ROOM_URL=https://daily.co/your-room
DAILY_TOKEN=your-meeting-token
DAILY_API_URL=https://api.daily.co/v1
# LiveKit Configuration
LIVEKIT_URL=wss://your-livekit.com
LIVEKIT_ROOM_NAME=room-name
LIVEKIT_API_KEY=...
LIVEKIT_API_SECRET=...
# Logging
LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR