docs
tessl install tessl/pypi-pipecat-ai@0.0.0An open source framework for building real-time voice and multimodal conversational AI agents with support for speech-to-text, text-to-speech, LLMs, and multiple transport protocols
Text-to-Speech services convert text into audio in Pipecat pipelines. The framework supports 20+ TTS providers with uniform interfaces for voice synthesis, streaming, and voice management.
{ .api }
from pipecat.services.tts_service import TTSService
from pipecat.services.ai_service import AIService
from pipecat.frames.frames import TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, AudioRawFrame
from typing import AsyncGenerator
class TTSService(AIService):
"""Base class for all TTS services.
Provides universal interface for text-to-speech synthesis
across different providers.
Key Features:
- Text to audio conversion
- Voice selection and management
- Streaming audio generation
- Event handlers for connection monitoring
- Automatic audio format handling
Methods:
run_tts(text): Synthesize text to audio
set_voice(voice_id): Change voice
set_model(model): Change TTS model
process_frame(frame, direction): Process frames
Event Handlers:
on_connected: Service connected
on_disconnected: Service disconnected
on_connection_error: Connection error occurred
Frames Consumed:
- TextFrame: Generic text
- TTSTextFrame: Text explicitly for TTS
- TTSSpeakFrame: Direct TTS request
- LLMTextFrame: LLM-generated text
Frames Produced:
- TTSAudioRawFrame: Synthesized audio
- TTSStartedFrame: Synthesis started
- TTSStoppedFrame: Synthesis stopped
Example:
from pipecat.services.elevenlabs import ElevenLabsTTSService
tts = ElevenLabsTTSService(
api_key="your-key",
voice_id="voice-id"
)
pipeline = Pipeline([
llm_service,
tts, # Converts text to audio
transport.output()
])
"""
def __init__(self, **kwargs):
"""Initialize TTS service.
Args:
**kwargs: Provider-specific configuration
"""
super().__init__(**kwargs)
self._voice_id = None
self._model = None
async def run_tts(self, text: str) -> AsyncGenerator[AudioRawFrame, None]:
"""Synthesize text to audio.
Args:
text: Text to synthesize
Yields:
AudioRawFrame: Audio chunks
"""
raise NotImplementedError("Subclasses must implement run_tts()")
def set_voice(self, voice_id: str):
"""Set voice for synthesis.
Args:
voice_id: Voice identifier
"""
self._voice_id = voice_id
def set_model(self, model: str):
"""Set TTS model.
Args:
model: Model identifier
"""
self._model = model{ .api }
from pipecat.services.openai import OpenAITTSService
from typing import Optional, Dict
class OpenAITTSService(TTSService):
"""OpenAI TTS service.
Supports: alloy, echo, fable, onyx, nova, shimmer voices.
Args:
api_key: OpenAI API key
voice: Voice identifier (default: "alloy")
model: Model (default: "tts-1")
params: Additional parameters
Example:
tts = OpenAITTSService(
api_key="sk-...",
voice="alloy",
model="tts-1-hd" # HD quality
)
"""
def __init__(
self,
api_key: str,
voice: str = "alloy",
model: str = "tts-1",
params: Optional[Dict] = None,
**kwargs
):
"""Initialize OpenAI TTS service.
Args:
api_key: OpenAI API key
voice: Voice identifier
model: Model name
params: Additional parameters
**kwargs: Additional service arguments
"""
super().__init__(**kwargs)
self.api_key = api_key
self._voice_id = voice
self._model = model
self.params = params or {}{ .api }
from pipecat.services.elevenlabs import ElevenLabsTTSService
from typing import Optional, Dict
class ElevenLabsTTSService(TTSService):
"""ElevenLabs TTS service (high-quality voices).
Args:
api_key: ElevenLabs API key
voice_id: Voice ID from ElevenLabs
model_id: Model ID (default: "eleven_turbo_v2")
params: Voice settings (stability, similarity_boost)
Example:
tts = ElevenLabsTTSService(
api_key="...",
voice_id="21m00Tcm4TlvDq8ikWAM",
model_id="eleven_turbo_v2",
params={
"stability": 0.5,
"similarity_boost": 0.75
}
)
"""
def __init__(
self,
api_key: str,
voice_id: str,
model_id: str = "eleven_turbo_v2",
params: Optional[Dict] = None,
**kwargs
):
"""Initialize ElevenLabs TTS service.
Args:
api_key: ElevenLabs API key
voice_id: Voice ID
model_id: Model ID
params: Voice settings
**kwargs: Additional service arguments
"""
super().__init__(**kwargs)
self.api_key = api_key
self._voice_id = voice_id
self._model = model_id
self.params = params or {}{ .api }
from pipecat.services.cartesia import CartesiaTTSService
class CartesiaTTSService(TTSService):
"""Cartesia TTS service (low-latency).
Args:
api_key: Cartesia API key
voice_id: Voice identifier
model_id: Model ID (default: "sonic-english")
params: Voice parameters
Example:
tts = CartesiaTTSService(
api_key="...",
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091",
model_id="sonic-english"
)
"""
def __init__(
self,
api_key: str,
voice_id: str,
model_id: str = "sonic-english",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.deepgram import DeepgramTTSService
class DeepgramTTSService(TTSService):
"""Deepgram TTS service.
Args:
api_key: Deepgram API key
voice: Voice name (e.g., "aura-asteria-en")
params: TTS parameters
Example:
tts = DeepgramTTSService(
api_key="...",
voice="aura-asteria-en"
)
"""
def __init__(
self,
api_key: str,
voice: str = "aura-asteria-en",
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.playht import PlayHTTTSService
class PlayHTTTSService(TTSService):
"""PlayHT TTS service.
Args:
api_key: PlayHT API key
user_id: PlayHT user ID
voice_id: Voice identifier
params: Voice parameters
Example:
tts = PlayHTTTSService(
api_key="...",
user_id="...",
voice_id="..."
)
"""
def __init__(
self,
api_key: str,
user_id: str,
voice_id: str,
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.azure import AzureTTSService
class AzureTTSService(TTSService):
"""Azure Speech TTS service.
Args:
api_key: Azure subscription key
region: Azure region
voice: Voice name (e.g., "en-US-JennyNeural")
params: Speech parameters
Example:
tts = AzureTTSService(
api_key="...",
region="eastus",
voice="en-US-JennyNeural"
)
"""
def __init__(
self,
api_key: str,
region: str,
voice: str,
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.google import GoogleTTSService
class GoogleTTSService(TTSService):
"""Google Cloud TTS service.
Args:
credentials: Path to credentials JSON or dict
voice_id: Voice name (e.g., "en-US-Neural2-A")
params: TTS parameters
Example:
tts = GoogleTTSService(
credentials="path/to/credentials.json",
voice_id="en-US-Neural2-A"
)
"""
def __init__(
self,
credentials: Union[str, Dict],
voice_id: str,
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.aws import AWSTTSService
class AWSTTSService(TTSService):
"""AWS Polly TTS service.
Args:
aws_access_key_id: AWS access key
aws_secret_access_key: AWS secret key
aws_region: AWS region
voice_id: Voice ID (e.g., "Joanna")
params: Polly parameters
Example:
tts = AWSTTSService(
aws_access_key_id="...",
aws_secret_access_key="...",
aws_region="us-east-1",
voice_id="Joanna"
)
"""
def __init__(
self,
aws_access_key_id: str,
aws_secret_access_key: str,
aws_region: str,
voice_id: str,
params: Optional[Dict] = None,
**kwargs
):
pass{ .api }
from pipecat.services.fish import FishTTSService
class FishTTSService(TTSService):
"""Fish Audio TTS service integration.
High-quality neural TTS from Fish Audio with voice cloning support.
Args:
api_key: Fish Audio API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters
Example:
tts = FishTTSService(
api_key="your-api-key",
voice_id="default-voice",
params={"speed": 1.0}
)
"""
pass{ .api }
from pipecat.services.gradium import GradiumTTSService
class GradiumTTSService(TTSService):
"""Gradium TTS service integration.
Text-to-speech via the Gradium platform.
Args:
api_key: Gradium API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters
Example:
tts = GradiumTTSService(
api_key="your-api-key",
voice_id="gradium-voice",
params={"rate": 1.0}
)
"""
pass{ .api }
from pipecat.services.hume import HumeTTSService
class HumeTTSService(TTSService):
"""Hume AI TTS service integration.
Emotionally intelligent TTS from Hume AI with prosody control.
Args:
api_key: Hume AI API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters including emotional prosody settings
Example:
tts = HumeTTSService(
api_key="your-api-key",
voice_id="hume-voice",
params={"emotion": "calm", "speed": 1.0}
)
"""
pass{ .api }
from pipecat.services.lmnt import LMNTTTSService
class LMNTTTSService(TTSService):
"""LMNT TTS service integration.
Low-latency, high-quality TTS optimized for real-time applications.
Args:
api_key: LMNT API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters
Example:
tts = LMNTTTSService(
api_key="your-api-key",
voice_id="lily",
params={"speed": 1.0}
)
"""
pass{ .api }
from pipecat.services.minimax import MinimaxTTSService
class MinimaxTTSService(TTSService):
"""Minimax TTS service integration.
Chinese and multilingual TTS from Minimax AI.
Args:
api_key: Minimax API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters
Example:
tts = MinimaxTTSService(
api_key="your-api-key",
voice_id="minimax-voice",
params={"speed": 1.0, "language": "zh"}
)
"""
pass{ .api }
from pipecat.services.neuphonic import NeuphonTTSService
class NeuphonTTSService(TTSService):
"""Neuphonic TTS service integration.
Advanced neural TTS with natural prosody and voice control.
Args:
api_key: Neuphonic API key
voice_id: Voice identifier
model: Model identifier
params: TTS parameters
Example:
tts = NeuphonTTSService(
api_key="your-api-key",
voice_id="neuphon-voice",
params={"speed": 1.0}
)
"""
pass{ .api }
from pipecat.services.rime import RimeTTSService
class RimeTTSService(TTSService):
"""Rime TTS service integration.
Real-time TTS optimized for conversational AI applications.
Args:
api_key: Rime API key
voice_id: Voice identifier or model_id
speaker: Speaker identifier
params: TTS parameters
Example:
tts = RimeTTSService(
api_key="your-api-key",
voice_id="rime-voice",
speaker="default",
params={"speed_alpha": 1.0}
)
"""
pass{ .api }
from pipecat.services.asyncai import AsyncAITTSService, AsyncAIHttpTTSService
class AsyncAITTSService(TTSService):
"""AsyncAI TTS service with WebSocket streaming.
High-quality multilingual text-to-speech with low latency via WebSocket connection.
Args:
api_key: AsyncAI API key
voice_id: UUID of the voice to use
model: TTS model (default: "asyncflow_multilingual_v1.0")
sample_rate: Audio sample rate
encoding: Audio encoding format (default: "pcm_s16le")
container: Audio container format (default: "raw")
params: Additional input parameters (language, etc.)
Example:
tts = AsyncAITTSService(
api_key="your-api-key",
voice_id="voice-uuid",
model="asyncflow_multilingual_v1.0",
params=AsyncAITTSService.InputParams(
language=Language.EN
)
)
"""
def __init__(
self,
api_key: str,
voice_id: str,
model: str = "asyncflow_multilingual_v1.0",
sample_rate: Optional[int] = None,
encoding: str = "pcm_s16le",
container: str = "raw",
params: Optional[InputParams] = None,
**kwargs
):
passLearn more: AsyncAI Documentation
{ .api }
from pipecat.services.speechmatics import SpeechmaticsTTSService
class SpeechmaticsTTSService(TTSService):
"""Speechmatics TTS service integration.
Advanced text-to-speech with support for 50+ languages and natural-sounding voices.
Args:
api_key: Speechmatics API key
voice_id: Voice identifier
model: TTS model identifier
params: TTS parameters
Example:
tts = SpeechmaticsTTSService(
api_key="your-api-key",
voice_id="en-US-1",
params={"speed": 1.0}
)
"""
def __init__(
self,
api_key: str,
voice_id: str,
model: Optional[str] = None,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Speechmatics Documentation
{ .api }
from pipecat.services.inworld import InworldTTSService
class InworldTTSService(TTSService):
"""Inworld TTS service integration.
Character AI TTS with emotional prosody and personality-driven speech.
Designed for gaming and interactive character applications.
Args:
api_key: Inworld API key
character_id: Character identifier
voice: Voice configuration for the character
params: TTS parameters including emotion settings
Example:
tts = InworldTTSService(
api_key="your-api-key",
character_id="character-123",
voice="default",
params={"emotion": "happy"}
)
"""
def __init__(
self,
api_key: str,
character_id: str,
voice: Optional[str] = None,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Inworld Documentation
{ .api }
from pipecat.services.hathora import HathoraTTSService
class HathoraTTSService(TTSService):
"""Hathora TTS service integration.
TTS service integrated with Hathora's game backend infrastructure
for multiplayer and real-time gaming applications.
Args:
api_key: Hathora API key
voice_id: Voice identifier
params: TTS parameters
Example:
tts = HathoraTTSService(
api_key="your-api-key",
voice_id="hathora-voice",
params={"speed": 1.0}
)
"""
def __init__(
self,
api_key: str,
voice_id: str,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: Hathora Documentation
{ .api }
from pipecat.services.riva import RivaTTSService
class RivaTTSService(TTSService):
"""NVIDIA Riva TTS service integration.
GPU-accelerated text-to-speech using NVIDIA Riva for
ultra-low latency inference. Can be deployed on-premises or cloud.
Args:
server_url: Riva server URL
voice: Voice name (e.g., "English-US.Female-1")
sample_rate: Audio sample rate
params: TTS parameters
Example:
tts = RivaTTSService(
server_url="grpc://localhost:50051",
voice="English-US.Female-1",
sample_rate=24000
)
"""
def __init__(
self,
server_url: str,
voice: str,
sample_rate: int = 24000,
params: Optional[Dict] = None,
**kwargs
):
passLearn more: NVIDIA Riva Documentation
{ .api }
# Additional supported TTS providers
from pipecat.services.sarvam import SarvamTTSService # Indian languages
from pipecat.services.piper import PiperTTSService # Local TTS
from pipecat.services.xtts import XTTSTTSService # XTTS voice cloning
from pipecat.services.camb import CambTTSService # Cambridge TTS
from pipecat.services.gradium import GradiumTTSService # Gradium platform
# All follow similar pattern:
tts = ProviderTTSService(
api_key="...",
voice_id="...",
params={...}
){ .api }
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.pipeline.pipeline import Pipeline
tts = ElevenLabsTTSService(
api_key="...",
voice_id="..."
)
pipeline = Pipeline([
llm_service, # Generates text
tts, # Converts to audio
transport.output() # Sends audio
]){ .api }
from pipecat.frames.frames import TTSUpdateSettingsFrame
# Switch voice at runtime
update_frame = TTSUpdateSettingsFrame(
voice_id="new-voice-id"
)
await task.queue_frame(update_frame){ .api }
from pipecat.pipeline.service_switcher import ServiceSwitcher
# Multiple TTS services for fallback
tts_switcher = ServiceSwitcher(
services=[
elevenlabs_tts, # Primary
openai_tts, # Fallback 1
azure_tts # Fallback 2
],
strategy=FallbackStrategy()
){ .api }
@tts.event_handler("on_connection_error")
async def handle_error(error: Exception):
print(f"TTS connection error: {error}")
# Switch to fallback or retry{ .api }
# Good: Match voice to use case
# Friendly assistant
friendly_tts = OpenAITTSService(voice="alloy")
# Professional narrator
professional_tts = ElevenLabsTTSService(voice_id="professional-voice")
# Bad: Using inappropriate voice
kids_app_tts = OpenAITTSService(voice="onyx") # Deep male voice for kids app{ .api }
# Good: Use low-latency providers for real-time
from pipecat.services.cartesia import CartesiaTTSService
tts = CartesiaTTSService(api_key="...", voice_id="...") # Ultra-low latency
# Bad: Using high-quality but slow TTS for real-time
tts = ElevenLabsTTSService(model_id="eleven_multilingual_v2") # Slower