The official Python SDK for the Deepgram automated speech recognition platform.
npx @tessl/cli install tessl/pypi-deepgram-sdk@4.8.0The official Python SDK for the Deepgram automated speech recognition platform, enabling developers to integrate advanced AI-powered speech-to-text, text-to-speech, and audio intelligence capabilities into their applications. The SDK offers comprehensive functionality including real-time streaming transcription via WebSocket connections, batch processing of pre-recorded audio files, text-to-speech synthesis, conversational AI agents, text intelligence analysis, and complete project management through Deepgram's platform APIs.
pip install deepgram-sdkfrom deepgram import DeepgramClient, DeepgramClientOptionsCommon imports for specific functionality:
# For speech-to-text
from deepgram import (
ListenRESTClient, ListenWebSocketClient,
ListenRESTOptions, ListenWebSocketOptions
)
# For text-to-speech
from deepgram import (
SpeakRESTClient, SpeakWebSocketClient,
SpeakRESTOptions, SpeakWSOptions
)
# For text analysis
from deepgram import AnalyzeClient, AnalyzeOptions
# For project management
from deepgram import ManageClient
# For conversational AI
from deepgram import AgentWebSocketClientfrom deepgram import DeepgramClient, DeepgramClientOptions
import os
# Initialize client with API key
client = DeepgramClient(api_key="your-api-key")
# Alternative: Initialize with environment variables
# Set DEEPGRAM_API_KEY environment variable
client = DeepgramClient()
# Speech-to-text with prerecorded audio
from deepgram import UrlSource, ListenRESTOptions
source = UrlSource("https://example.com/audio.wav")
options = ListenRESTOptions(model="nova-2", language="en-US")
response = client.listen.rest.transcribe_url(source, options)
print(response.results.channels[0].alternatives[0].transcript)
# Text-to-speech
from deepgram import TextSource, SpeakRESTOptions
source = TextSource("Hello, world!")
options = SpeakRESTOptions(model="aura-asteria-en")
response = client.speak.rest.stream(source, options)
# Save audio to file
with open("output.wav", "wb") as f:
f.write(response.content)The Deepgram SDK is organized around a main client (DeepgramClient) that provides access to different service routers:
Each router provides both synchronous and asynchronous clients, with REST interfaces for batch processing and WebSocket interfaces for real-time streaming.
# Synchronous access
client.listen.rest # ListenRESTClient
client.listen.websocket # ListenWebSocketClient
client.speak.rest # SpeakRESTClient
client.speak.websocket # SpeakWebSocketClient
client.read # ReadClient/AnalyzeClient
client.manage # ManageClient
client.auth.v("1") # AuthRESTClient
client.selfhosted # SelfHostedClient
client.agent # AgentWebSocketClient
# Asynchronous access
client.listen.asyncrest # AsyncListenRESTClient
client.listen.asyncwebsocket # AsyncListenWebSocketClient
client.speak.asyncrest # AsyncSpeakRESTClient
client.speak.asyncwebsocket # AsyncSpeakWebSocketClient
client.read # AsyncReadClient/AsyncAnalyzeClient
client.asyncmanage # AsyncManageClient
client.asyncauth.v("1") # AsyncAuthRESTClient
client.asyncselfhosted # AsyncSelfHostedClient
client.agent # AsyncAgentWebSocketClientComprehensive speech recognition capabilities supporting both batch transcription of prerecorded audio and real-time streaming transcription. Includes advanced features like speaker diarization, punctuation, profanity filtering, keyword detection, and multiple language support.
# REST Client
class ListenRESTClient:
def transcribe_url(self, source, options): ...
def transcribe_file(self, source, options): ...
# WebSocket Client
class ListenWebSocketClient:
def start(self, options): ...
def send(self, data): ...
def finish(self): ...
def close(self): ...
# Options
class ListenRESTOptions:
model: str
language: str
punctuate: bool
diarize: bool
# ... additional options
class ListenWebSocketOptions:
model: str
language: str
encoding: str
sample_rate: int
# ... additional optionsHigh-quality neural text-to-speech synthesis with multiple voice models and real-time streaming capabilities. Supports both REST API for generating complete audio files and WebSocket streaming for real-time audio generation.
# REST Client
class SpeakRESTClient:
def stream(self, source, options): ...
def save(self, filename, source, options): ...
# WebSocket Client
class SpeakWebSocketClient:
def start(self, options): ...
def send(self, message): ...
def close(self): ...
# Options
class SpeakRESTOptions:
model: str
encoding: str
container: str
sample_rate: int
bit_rate: int
class SpeakWSOptions:
model: str
encoding: str
sample_rate: intAdvanced text intelligence capabilities including sentiment analysis, topic detection, intent recognition, and content summarization. Processes text content to extract insights and understanding.
class AnalyzeClient:
def analyze_url(self, source, options): ...
def analyze_text(self, source, options): ...
class AnalyzeOptions:
language: str
topics: bool
intents: bool
sentiment: bool
summarize: boolComplete account and project management functionality including API key management, usage tracking, team member management, and billing information access.
class ManageClient:
def get_projects(self): ...
def get_project(self, project_id): ...
def get_keys(self, project_id): ...
def create_key(self, project_id, options): ...
def get_usage(self, project_id, options): ...
def get_balances(self, project_id): ...
# ... additional management methodsReal-time conversational AI capabilities enabling voice-based interactions with intelligent agents. Supports function calling, dynamic prompt updates, and bidirectional audio streaming.
class AgentWebSocketClient:
def start(self, options): ...
def send_settings(self, settings): ...
def update_prompt(self, prompt): ...
def inject_message(self, message): ...
def close(self): ...
class SettingsOptions:
agent: dict
listen: dict
speak: dict
think: dictUtility classes for audio input/output operations including microphone capture and speaker playback, with configurable audio parameters and error handling.
class Microphone:
def __init__(self, **kwargs): ...
def start(self): ...
def finish(self): ...
class Speaker:
def __init__(self, **kwargs): ...
def start(self): ...
def finish(self): ...
# Constants
INPUT_CHANNELS: int = 1
INPUT_RATE: int = 16000
INPUT_CHUNK: int = 8192
OUTPUT_CHANNELS: int = 1
OUTPUT_RATE: int = 24000
OUTPUT_CHUNK: int = 8192Token management and authentication capabilities for generating temporary JWT tokens from API keys, enabling secure access with configurable time-to-live settings.
class AuthRESTClient:
def grant_token(self, ttl_seconds: int = None) -> GrantTokenResponse: ...
class AsyncAuthRESTClient:
async def grant_token(self, ttl_seconds: int = None) -> GrantTokenResponse: ...
class GrantTokenResponse:
access_token: str
expires_in: intSupport for on-premises and self-hosted Deepgram deployments with custom endpoint configuration and deployment management.
class SelfHostedClient:
def __init__(self, config: DeepgramClientOptions): ...
class AsyncSelfHostedClient:
def __init__(self, config: DeepgramClientOptions): ...
# Backward compatibility aliases
class OnPremClient(SelfHostedClient): ...
class AsyncOnPremClient(AsyncSelfHostedClient): ...class DeepgramClient:
def __init__(self, api_key: str = "", config: DeepgramClientOptions = None, access_token: str = ""): ...
@property
def listen(self): ...
@property
def speak(self): ...
@property
def read(self): ...
@property
def manage(self): ...
@property
def asyncmanage(self): ...
@property
def agent(self): ...
@property
def auth(self): ...
@property
def asyncauth(self): ...
@property
def selfhosted(self): ...
@property
def asyncselfhosted(self): ...
class DeepgramClientOptions:
api_key: str
access_token: str
url: str
verbose: int
headers: dict
options: dict
# Source types for different input methods
class TextSource:
def __init__(self, text: str): ...
class BufferSource:
def __init__(self, buffer: bytes): ...
class FileSource:
def __init__(self, file: str): ...
class UrlSource:
def __init__(self, url: str): ...
class StreamSource:
def __init__(self, stream): ...
# Base response class
class BaseResponse:
def __init__(self, **kwargs): ...class DeepgramError(Exception):
"""Base exception for Deepgram SDK errors"""
class DeepgramApiError(DeepgramError):
"""API response errors"""
class DeepgramApiKeyError(DeepgramError):
"""Missing or invalid API key"""
class DeepgramTypeError(DeepgramError):
"""Type validation errors"""
class DeepgramMicrophoneError(Exception):
"""Microphone operation errors"""
class DeepgramSpeakerError(Exception):
"""Speaker operation errors"""