The official Python SDK for the Deepgram automated speech recognition platform.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
The official Python SDK for the Deepgram automated speech recognition platform, enabling developers to integrate advanced AI-powered speech-to-text, text-to-speech, and audio intelligence capabilities into their applications. The SDK offers comprehensive functionality including real-time streaming transcription via WebSocket connections, batch processing of pre-recorded audio files, text-to-speech synthesis, conversational AI agents, text intelligence analysis, and complete project management through Deepgram's platform APIs.
pip install deepgram-sdkfrom deepgram import DeepgramClient, DeepgramClientOptionsCommon imports for specific functionality:
# For speech-to-text
from deepgram import (
ListenRESTClient, ListenWebSocketClient,
ListenRESTOptions, ListenWebSocketOptions
)
# For text-to-speech
from deepgram import (
SpeakRESTClient, SpeakWebSocketClient,
SpeakRESTOptions, SpeakWSOptions
)
# For text analysis
from deepgram import AnalyzeClient, AnalyzeOptions
# For project management
from deepgram import ManageClient
# For conversational AI
from deepgram import AgentWebSocketClientfrom deepgram import DeepgramClient, DeepgramClientOptions
import os
# Initialize client with API key
client = DeepgramClient(api_key="your-api-key")
# Alternative: Initialize with environment variables
# Set DEEPGRAM_API_KEY environment variable
client = DeepgramClient()
# Speech-to-text with prerecorded audio
from deepgram import UrlSource, ListenRESTOptions
source = UrlSource("https://example.com/audio.wav")
options = ListenRESTOptions(model="nova-2", language="en-US")
response = client.listen.rest.transcribe_url(source, options)
print(response.results.channels[0].alternatives[0].transcript)
# Text-to-speech
from deepgram import TextSource, SpeakRESTOptions
source = TextSource("Hello, world!")
options = SpeakRESTOptions(model="aura-asteria-en")
response = client.speak.rest.stream(source, options)
# Save audio to file
with open("output.wav", "wb") as f:
f.write(response.content)The Deepgram SDK is organized around a main client (DeepgramClient) that provides access to different service routers:
Each router provides both synchronous and asynchronous clients, with REST interfaces for batch processing and WebSocket interfaces for real-time streaming.
# Synchronous access
client.listen.rest # ListenRESTClient
client.listen.websocket # ListenWebSocketClient
client.speak.rest # SpeakRESTClient
client.speak.websocket # SpeakWebSocketClient
client.read # ReadClient/AnalyzeClient
client.manage # ManageClient
client.auth.v("1") # AuthRESTClient
client.selfhosted # SelfHostedClient
client.agent # AgentWebSocketClient
# Asynchronous access
client.listen.asyncrest # AsyncListenRESTClient
client.listen.asyncwebsocket # AsyncListenWebSocketClient
client.speak.asyncrest # AsyncSpeakRESTClient
client.speak.asyncwebsocket # AsyncSpeakWebSocketClient
client.read # AsyncReadClient/AsyncAnalyzeClient
client.asyncmanage # AsyncManageClient
client.asyncauth.v("1") # AsyncAuthRESTClient
client.asyncselfhosted # AsyncSelfHostedClient
client.agent # AsyncAgentWebSocketClientComprehensive speech recognition capabilities supporting both batch transcription of prerecorded audio and real-time streaming transcription. Includes advanced features like speaker diarization, punctuation, profanity filtering, keyword detection, and multiple language support.
# REST Client
class ListenRESTClient:
def transcribe_url(self, source, options): ...
def transcribe_file(self, source, options): ...
# WebSocket Client
class ListenWebSocketClient:
def start(self, options): ...
def send(self, data): ...
def finish(self): ...
def close(self): ...
# Options
class ListenRESTOptions:
model: str
language: str
punctuate: bool
diarize: bool
# ... additional options
class ListenWebSocketOptions:
model: str
language: str
encoding: str
sample_rate: int
# ... additional optionsHigh-quality neural text-to-speech synthesis with multiple voice models and real-time streaming capabilities. Supports both REST API for generating complete audio files and WebSocket streaming for real-time audio generation.
# REST Client
class SpeakRESTClient:
def stream(self, source, options): ...
def save(self, filename, source, options): ...
# WebSocket Client
class SpeakWebSocketClient:
def start(self, options): ...
def send(self, message): ...
def close(self): ...
# Options
class SpeakRESTOptions:
model: str
encoding: str
container: str
sample_rate: int
bit_rate: int
class SpeakWSOptions:
model: str
encoding: str
sample_rate: intAdvanced text intelligence capabilities including sentiment analysis, topic detection, intent recognition, and content summarization. Processes text content to extract insights and understanding.
class AnalyzeClient:
def analyze_url(self, source, options): ...
def analyze_text(self, source, options): ...
class AnalyzeOptions:
language: str
topics: bool
intents: bool
sentiment: bool
summarize: boolComplete account and project management functionality including API key management, usage tracking, team member management, and billing information access.
class ManageClient:
def get_projects(self): ...
def get_project(self, project_id): ...
def get_keys(self, project_id): ...
def create_key(self, project_id, options): ...
def get_usage(self, project_id, options): ...
def get_balances(self, project_id): ...
# ... additional management methodsReal-time conversational AI capabilities enabling voice-based interactions with intelligent agents. Supports function calling, dynamic prompt updates, and bidirectional audio streaming.
class AgentWebSocketClient:
def start(self, options): ...
def send_settings(self, settings): ...
def update_prompt(self, prompt): ...
def inject_message(self, message): ...
def close(self): ...
class SettingsOptions:
agent: dict
listen: dict
speak: dict
think: dictUtility classes for audio input/output operations including microphone capture and speaker playback, with configurable audio parameters and error handling.
class Microphone:
def __init__(self, **kwargs): ...
def start(self): ...
def finish(self): ...
class Speaker:
def __init__(self, **kwargs): ...
def start(self): ...
def finish(self): ...
# Constants
INPUT_CHANNELS: int = 1
INPUT_RATE: int = 16000
INPUT_CHUNK: int = 8192
OUTPUT_CHANNELS: int = 1
OUTPUT_RATE: int = 24000
OUTPUT_CHUNK: int = 8192Token management and authentication capabilities for generating temporary JWT tokens from API keys, enabling secure access with configurable time-to-live settings.
class AuthRESTClient:
def grant_token(self, ttl_seconds: int = None) -> GrantTokenResponse: ...
class AsyncAuthRESTClient:
async def grant_token(self, ttl_seconds: int = None) -> GrantTokenResponse: ...
class GrantTokenResponse:
access_token: str
expires_in: intSupport for on-premises and self-hosted Deepgram deployments with custom endpoint configuration and deployment management.
class SelfHostedClient:
def __init__(self, config: DeepgramClientOptions): ...
class AsyncSelfHostedClient:
def __init__(self, config: DeepgramClientOptions): ...
# Backward compatibility aliases
class OnPremClient(SelfHostedClient): ...
class AsyncOnPremClient(AsyncSelfHostedClient): ...class DeepgramClient:
def __init__(self, api_key: str = "", config: DeepgramClientOptions = None, access_token: str = ""): ...
@property
def listen(self): ...
@property
def speak(self): ...
@property
def read(self): ...
@property
def manage(self): ...
@property
def asyncmanage(self): ...
@property
def agent(self): ...
@property
def auth(self): ...
@property
def asyncauth(self): ...
@property
def selfhosted(self): ...
@property
def asyncselfhosted(self): ...
class DeepgramClientOptions:
api_key: str
access_token: str
url: str
verbose: int
headers: dict
options: dict
# Source types for different input methods
class TextSource:
def __init__(self, text: str): ...
class BufferSource:
def __init__(self, buffer: bytes): ...
class FileSource:
def __init__(self, file: str): ...
class UrlSource:
def __init__(self, url: str): ...
class StreamSource:
def __init__(self, stream): ...
# Base response class
class BaseResponse:
def __init__(self, **kwargs): ...class DeepgramError(Exception):
"""Base exception for Deepgram SDK errors"""
class DeepgramApiError(DeepgramError):
"""API response errors"""
class DeepgramApiKeyError(DeepgramError):
"""Missing or invalid API key"""
class DeepgramTypeError(DeepgramError):
"""Type validation errors"""
class DeepgramMicrophoneError(Exception):
"""Microphone operation errors"""
class DeepgramSpeakerError(Exception):
"""Speaker operation errors"""