Tencent Cloud Machine Translation (TMT) SDK for Python providing comprehensive text, file, image, and speech translation capabilities
Audio translation combining speech recognition and translation for Chinese-English bidirectional processing. Supports both streaming and batch audio processing with multiple audio format compatibility.
Recognizes speech in audio files and translates the recognized text to the target language. Supports real-time streaming and batch processing modes.
def SpeechTranslate(self, request: models.SpeechTranslateRequest) -> models.SpeechTranslateResponse:
"""
Translate speech audio to text in target language.
Args:
request: SpeechTranslateRequest with audio data and parameters
Returns:
SpeechTranslateResponse with translated text result
Raises:
TencentCloudSDKException: For various error conditions
"""Usage Example (Single Audio File):
import base64
from tencentcloud.common import credential
from tencentcloud.tmt.v20180321.tmt_client import TmtClient
from tencentcloud.tmt.v20180321 import models
# Initialize client
cred = credential.Credential("SecretId", "SecretKey")
client = TmtClient(cred, "ap-beijing")
# Read and encode audio file
with open("speech.wav", "rb") as f:
audio_data = base64.b64encode(f.read()).decode()
# Create speech translation request
req = models.SpeechTranslateRequest()
req.SessionUuid = "unique-session-id"
req.Source = "zh" # Chinese input
req.Target = "en" # English output
req.AudioFormat = 1 # PCM format
req.Data = audio_data
req.Seq = 0 # Sequence number
req.IsEnd = 1 # Single file, mark as end
req.ProjectId = 0
# Perform speech translation
resp = client.SpeechTranslate(req)
print(f"Session: {resp.SessionUuid}")
print(f"Translation: {resp.Source} -> {resp.Target}")
print(f"Original: {resp.SourceText}")
print(f"Translated: {resp.TargetText}")
print(f"Recognition status: {resp.RecognizeStatus}")Usage Example (Streaming Audio):
def stream_audio_translation(client, audio_chunks, session_uuid):
"""
Process streaming audio chunks for real-time translation.
Args:
client: TmtClient instance
audio_chunks: List of audio data chunks (200-500ms each)
session_uuid: Unique session identifier
Returns:
List of translation results
"""
results = []
for i, chunk in enumerate(audio_chunks):
req = models.SpeechTranslateRequest()
req.SessionUuid = session_uuid
req.Source = "en"
req.Target = "zh"
req.AudioFormat = 1 # PCM only for streaming
req.Data = base64.b64encode(chunk).decode()
req.Seq = i
req.IsEnd = 1 if i == len(audio_chunks) - 1 else 0
req.ProjectId = 0
try:
resp = client.SpeechTranslate(req)
if resp.TargetText:
results.append(resp.TargetText)
print(f"Chunk {i}: {resp.SourceText} -> {resp.TargetText}")
except Exception as e:
print(f"Error processing chunk {i}: {e}")
return results
# Example usage
session_id = "streaming-session-001"
# audio_chunks would be your segmented audio data
# results = stream_audio_translation(client, audio_chunks, session_id)class SpeechTranslateRequest:
"""
Request parameters for speech translation.
Attributes:
SessionUuid (str): Unique session identifier for tracking
Source (str): Source language code (zh, en)
Target (str): Target language code (zh, en)
AudioFormat (int): Audio format (1: PCM, 2: MP3, 3: SPEEX)
Data (str): Base64 encoded audio data
Seq (int): Sequence number for streaming (starts from 0)
IsEnd (int): End flag (0: more chunks, 1: final chunk)
ProjectId (int): Project ID (default: 0)
"""class SpeechTranslateResponse:
"""
Response from speech translation.
Attributes:
SessionUuid (str): Session identifier from request
RecognizeStatus (int): Speech recognition status (1=processing, 0=complete)
SourceText (str): Recognized original text
TargetText (str): Translated text result
Seq (int): Audio fragment sequence number
Source (str): Source language
Target (str): Target language
VadSeq (int): Voice activity detection sequence number
RequestId (str): Unique request identifier
"""PCM (Format ID: 1)
MP3 (Format ID: 2)
SPEEX (Format ID: 3)
Speech translation currently supports Chinese-English bidirectional translation:
Common error scenarios for speech translation:
Example error handling:
def safe_speech_translate(client, request):
"""Safely perform speech translation with error handling."""
try:
response = client.SpeechTranslate(request)
return response.TargetText
except TencentCloudSDKException as e:
if e.code == "UNSUPPORTEDOPERATION_AUDIODURATIONEXCEED":
print("Audio file too long, split into smaller chunks")
elif e.code == "UNSUPPORTEDOPERATION_UNSUPPORTEDLANGUAGE":
print("Language pair not supported, use zh<->en only")
elif e.code == "FAILEDOPERATION_REQUESTAILABERR":
print("Audio processing failed, check audio quality")
else:
print(f"Speech translation error: {e.code} - {e.message}")
return None
# Usage
result = safe_speech_translate(client, req)
if result:
print(f"Translation: {result}")Install with Tessl CLI
npx tessl i tessl/pypi-tencentcloud-sdk-python-tmt@3.0.1