The official Python SDK for the Deepgram automated speech recognition platform.
—
High-quality neural text-to-speech synthesis with multiple voice models and real-time streaming capabilities. The Speak module supports both REST API for generating complete audio files and WebSocket streaming for real-time audio generation with various voice models, audio formats, and synthesis options.
Synchronous client for generating complete audio files from text input with comprehensive voice and format options.
class SpeakRESTClient:
def stream_memory(
self,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""
Generate speech from text input and return in-memory response.
Args:
source: Text/audio source for synthesis (TextSource, BufferSource, StreamSource)
options: Synthesis configuration options
addons: Additional request parameters
headers: Additional HTTP headers
timeout: Request timeout
endpoint: API endpoint override
Returns:
SpeakRESTResponse: Generated audio data with metadata
"""
def stream_raw(
self,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> httpx.Response:
"""
Generate speech and return raw HTTP response.
Args:
source: Text/audio source for synthesis (TextSource, BufferSource, StreamSource)
options: Synthesis configuration options
addons: Additional request parameters
headers: Additional HTTP headers
timeout: Request timeout
endpoint: API endpoint override
Returns:
httpx.Response: Raw HTTP response with audio data
"""
def save(
self,
filename: str,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""
Generate speech and save directly to file.
Args:
filename: Output file path
source: Text/audio source for synthesis (TextSource, BufferSource, StreamSource)
options: Synthesis configuration options
addons: Additional request parameters
headers: Additional HTTP headers
timeout: Request timeout
endpoint: API endpoint override
Returns:
SpeakRESTResponse: Response metadata and status
"""
def file(
self,
filename: str,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""
Generate speech and save to file (alias for save method).
Args:
filename: Output file path
source: Text/audio source for synthesis (TextSource, BufferSource, StreamSource)
options: Synthesis configuration options
addons: Additional request parameters
timeout: Request timeout
endpoint: API endpoint override
Returns:
SpeakRESTResponse: Response metadata and status
"""
class AsyncSpeakRESTClient:
async def stream_memory(
self,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""Async version of stream_memory method"""
async def stream_raw(
self,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> httpx.Response:
"""Async version of stream_raw method"""
async def save(
self,
filename: str,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
headers: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""Async version of save method"""
async def file(
self,
filename: str,
source: FileSource,
options: SpeakRESTOptions = None,
addons: dict = None,
timeout = None,
endpoint: str = "v1/speak",
**kwargs
) -> SpeakRESTResponse:
"""Async version of file method"""Real-time streaming text-to-speech client supporting incremental text input and real-time audio output.
class SpeakWebSocketClient:
def __init__(self, config: DeepgramClientOptions, microphone: Microphone = None): ...
def start(
self,
options: SpeakWSOptions = None,
addons: dict = None,
headers: dict = None,
members: dict = None,
**kwargs
) -> bool:
"""
Start WebSocket connection for streaming TTS.
Args:
options: WebSocket configuration options
addons: Additional request parameters
headers: Additional HTTP headers
members: Member configuration
Returns:
bool: True if connection started successfully
"""
def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None:
"""
Register event handler for WebSocket events.
Args:
event: WebSocket event type
handler: Callable to handle the event
"""
def send_text(self, text_input: str) -> bool:
"""
Send text for speech synthesis.
Args:
text_input: Text to convert to speech
Returns:
bool: True if text sent successfully
"""
def send(self, data: Union[str, bytes]) -> bool:
"""
Send text data (alias for send_text).
Args:
data: Text or bytes to send
Returns:
bool: True if data sent successfully
"""
def send_raw(self, msg: str) -> bool:
"""
Send raw WebSocket message.
Args:
msg: Raw message to send
Returns:
bool: True if message sent successfully
"""
def send_control(
self,
msg_type: Union[SpeakWebSocketMessage, str],
data: str = ""
) -> bool:
"""
Send control message.
Args:
msg_type: Message type constant
data: Optional data payload
Returns:
bool: True if control message sent successfully
"""
def flush(self) -> bool:
"""
Flush current synthesis buffer.
Returns:
bool: True if flush successful
"""
def clear(self) -> bool:
"""
Clear synthesis buffer.
Returns:
bool: True if clear successful
"""
def finish(self) -> bool:
"""
Finish WebSocket connection.
Returns:
bool: True if finish successful
"""
def wait_for_complete(self) -> None:
"""
Wait for synthesis completion.
"""
class AsyncSpeakWebSocketClient:
def __init__(self, config: DeepgramClientOptions, microphone: Microphone = None): ...
async def start(...) -> bool: ...
def on(self, event: SpeakWebSocketEvents, handler: Callable) -> None: ... # Not async
async def send_text(self, text_input: str) -> bool: ...
async def send(self, data: Union[str, bytes]) -> bool: ...
async def send_raw(self, msg: str) -> bool: ...
async def send_control(...) -> bool: ...
async def flush(self) -> bool: ...
async def clear(self) -> bool: ...
async def finish(self) -> bool: ...
async def wait_for_complete(self) -> None: ...
# Alternative client names
class SpeakWSClient(SpeakWebSocketClient): ...
class AsyncSpeakWSClient(AsyncSpeakWebSocketClient): ...Access text-to-speech clients through the main client's speak router.
class SpeakRouter:
@property
def rest(self) -> SpeakRESTClient: ...
@property
def asyncrest(self) -> AsyncSpeakRESTClient: ...
@property
def websocket(self) -> SpeakWebSocketClient: ...
@property
def asyncwebsocket(self) -> AsyncSpeakWebSocketClient: ...class SpeakRESTOptions:
def __init__(self, **kwargs): ...
# Voice model selection
model: str = "aura-asteria-en" # Voice model name
# Audio format settings
encoding: str = "linear16" # Audio encoding format
container: str = "wav" # Audio container format
sample_rate: int = 24000 # Sample rate in Hz
bit_rate: int = None # Bit rate for compressed formats
# Additional options
extra: dict = None # Additional synthesis options
# Legacy alias
class SpeakOptions(SpeakRESTOptions): ...class SpeakWSOptions:
def __init__(self, **kwargs): ...
# Voice model selection
model: str = "aura-asteria-en" # Voice model name
# Audio format settings (required for WebSocket)
encoding: str = "linear16" # Audio encoding format
sample_rate: int = 24000 # Sample rate in Hz
container: str = None # Audio container (optional for streaming)
# Additional options
extra: dict = None # Additional synthesis optionsEvent constants and message types for WebSocket text-to-speech operations.
class SpeakWebSocketEvents:
"""WebSocket event constants for TTS operations"""
OPEN: str = "Open"
METADATA: str = "Metadata"
AUDIO: str = "Audio"
FLUSHED: str = "Flushed"
CLEARED: str = "Cleared"
CLOSE: str = "Close"
ERROR: str = "Error"
WARNING: str = "Warning"
UNHANDLED: str = "Unhandled"
class SpeakWebSocketMessage:
"""WebSocket message type constants"""
SPEAK: str = "Speak"
FLUSH: str = "Flush"
CLEAR: str = "Clear"
CLOSE: str = "Close"Input sources for text data in various formats.
class SpeakSource:
"""Base class for text-to-speech sources"""
class TextSource(SpeakSource):
def __init__(self, text: str):
"""
Text from string.
Args:
text: Text content to synthesize
"""
class BufferSource(SpeakSource):
def __init__(self, buffer: bytes):
"""
Text from byte buffer.
Args:
buffer: Text content as bytes
"""
class StreamSource(SpeakSource):
def __init__(self, stream):
"""
Text from stream object.
Args:
stream: File-like stream object
"""
class FileSource(SpeakSource):
def __init__(self, file: str):
"""
Text from local file.
Args:
file: Path to local text file
"""
# Alternative source names
class SpeakRestSource(SpeakSource): ...
class SpeakRESTSource(SpeakSource): ...class SpeakRESTResponse:
"""REST text-to-speech response containing generated audio"""
content: bytes # Generated audio data
headers: dict # Response headers with metadata
def stream_to_file(self, filename: str) -> None:
"""
Save audio content to file.
Args:
filename: Output file path
"""
# Legacy alias
class SpeakResponse(SpeakRESTResponse): ...class SpeakWSMetadataResponse:
"""WebSocket metadata response"""
type: str = "Metadata"
request_id: str
model_name: str
model_uuid: str
class FlushedResponse:
"""Buffer flush confirmation"""
type: str = "Flushed"
class ClearedResponse:
"""Buffer clear confirmation"""
type: str = "Cleared"
class WarningResponse:
"""Synthesis warning"""
type: str = "Warning"
message: str
# Common WebSocket responses are inherited from common module:
# OpenResponse, CloseResponse, ErrorResponse, UnhandledResponsefrom deepgram import DeepgramClient, TextSource, SpeakRESTOptions
client = DeepgramClient(api_key="your-api-key")
# Generate speech from text
source = TextSource("Hello, world! This is a test of the Deepgram text-to-speech API.")
options = SpeakRESTOptions(
model="aura-asteria-en",
encoding="linear16",
container="wav",
sample_rate=24000
)
response = client.speak.rest.stream(source, options)
# Save to file
with open("output.wav", "wb") as f:
f.write(response.content)
# Or use convenience method
response.stream_to_file("output.wav")from deepgram import DeepgramClient, TextSource, SpeakRESTOptions
client = DeepgramClient(api_key="your-api-key")
# Different voice models
models = [
"aura-asteria-en", # English, female
"aura-luna-en", # English, female
"aura-stella-en", # English, female
"aura-athena-en", # English, female
"aura-hera-en", # English, female
"aura-orion-en", # English, male
"aura-arcas-en", # English, male
"aura-perseus-en", # English, male
"aura-angus-en", # English, male
"aura-orpheus-en", # English, male
]
source = TextSource("This is a test with different voice models.")
for model in models:
options = SpeakRESTOptions(model=model)
response = client.speak.rest.stream(source, options)
response.stream_to_file(f"output_{model}.wav")from deepgram import DeepgramClient, TextSource, SpeakRESTOptions
client = DeepgramClient(api_key="your-api-key")
source = TextSource("Testing different audio formats.")
# WAV format (uncompressed)
wav_options = SpeakRESTOptions(
model="aura-asteria-en",
encoding="linear16",
container="wav",
sample_rate=24000
)
# MP3 format (compressed)
mp3_options = SpeakRESTOptions(
model="aura-asteria-en",
encoding="mp3",
container="mp3",
sample_rate=22050,
bit_rate=128000
)
# FLAC format (lossless compression)
flac_options = SpeakRESTOptions(
model="aura-asteria-en",
encoding="flac",
container="flac",
sample_rate=24000
)
# Generate in different formats
wav_response = client.speak.rest.stream(source, wav_options)
mp3_response = client.speak.rest.stream(source, mp3_options)
flac_response = client.speak.rest.stream(source, flac_options)
wav_response.stream_to_file("output.wav")
mp3_response.stream_to_file("output.mp3")
flac_response.stream_to_file("output.flac")from deepgram import DeepgramClient, SpeakWSOptions, SpeakWebSocketEvents
import threading
import queue
client = DeepgramClient(api_key="your-api-key")
audio_queue = queue.Queue()
def on_open(self, open, **kwargs):
print("TTS connection opened")
def on_audio_data(self, data, **kwargs):
# Received audio chunk
audio_queue.put(data)
def on_close(self, close, **kwargs):
print("TTS connection closed")
def on_error(self, error, **kwargs):
print(f"TTS error: {error}")
# Configure WebSocket options
options = SpeakWSOptions(
model="aura-asteria-en",
encoding="linear16",
sample_rate=24000
)
# Start connection
dg_connection = client.speak.websocket.v("1")
dg_connection.on(SpeakWebSocketEvents.Open, on_open)
dg_connection.on(SpeakWebSocketEvents.AudioData, on_audio_data)
dg_connection.on(SpeakWebSocketEvents.Close, on_close)
dg_connection.on(SpeakWebSocketEvents.Error, on_error)
if dg_connection.start(options):
# Send text incrementally
dg_connection.send("Hello, this is streaming text-to-speech. ")
dg_connection.send("I can send text in chunks and receive audio in real-time. ")
dg_connection.send("This is very useful for interactive applications.")
# Flush to ensure all text is processed
dg_connection.flush()
# Close connection
dg_connection.close()
# Process received audio
audio_data = b""
while not audio_queue.empty():
audio_data += audio_queue.get()
# Save streamed audio
with open("streamed_output.wav", "wb") as f:
f.write(audio_data)import asyncio
from deepgram import DeepgramClient, TextSource, SpeakRESTOptions
async def async_tts_example():
client = DeepgramClient(api_key="your-api-key")
source = TextSource("This is an async text-to-speech example.")
options = SpeakRESTOptions(
model="aura-asteria-en",
encoding="linear16",
container="wav"
)
response = await client.speak.asyncrest.synthesize(source, options)
with open("async_output.wav", "wb") as f:
f.write(response.content)
print("Async TTS completed")
# Run async example
asyncio.run(async_tts_example())from deepgram import DeepgramClient, DeepgramApiError, TextSource, SpeakRESTOptions
client = DeepgramClient(api_key="your-api-key")
try:
source = TextSource("Text to synthesize")
options = SpeakRESTOptions(
model="invalid-model", # This will cause an error
encoding="linear16"
)
response = client.speak.rest.stream(source, options)
except DeepgramApiError as e:
print(f"API Error: {e}")
except Exception as e:
print(f"Unexpected error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-deepgram-sdk