Google Cloud Texttospeech API client library for converting text to speech with multiple voices and audio formats
—
Long audio synthesis is designed for generating extended audio content that exceeds the limits of standard synthesis operations. It uses Google Cloud's long-running operations (LRO) pattern to handle large-scale text-to-speech generation asynchronously, with output delivered to Google Cloud Storage.
Key Features:
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
# Synchronous long audio client
long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
# Asynchronous long audio client
async_long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeAsyncClient()
# Alternative import paths
from google.cloud import texttospeech_v1
# Through main module
long_client = texttospeech_v1.services.text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()import os
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
# Set up authentication (if not using default credentials)
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/service-account-key.json'
# Initialize with explicit project
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
# Project and location information
PROJECT_ID = "your-project-id"
LOCATION = "us-central1" # or other supported location
PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}"from google.cloud import texttospeech_v1
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
# Initialize client
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
# Create long audio synthesis request
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent="projects/your-project-id/locations/us-central1",
input=texttospeech_v1.SynthesisInput(
text="This is a very long text that will be converted to audio. " * 100
),
audio_config=texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,
sample_rate_hertz=22050
),
voice=texttospeech_v1.VoiceSelectionParams(
language_code="en-US",
name="en-US-Wavenet-A"
),
output_gcs_uri="gs://your-bucket-name/output-audio.wav"
)
# Start long-running operation
operation = client.synthesize_long_audio(request=request)
print(f"Operation name: {operation.name}")
print("Long audio synthesis started...")
# Wait for completion
result = operation.result() # Blocks until complete
print("Long audio synthesis completed!")
print(f"Result: {result}")from google.cloud import texttospeech_v1
# Prepare long SSML content
long_ssml_content = """
<speak>
<p>
<s>Welcome to this long audio demonstration.</s>
<s>This content will be processed as a long-running operation.</s>
</p>
<break time="2s"/>
<p>
<s>Here we have multiple paragraphs with various SSML features.</s>
<s><prosody rate="slow">This part is spoken slowly.</prosody></s>
<s><prosody rate="fast">While this part is much faster.</prosody></s>
</p>
<break time="3s"/>
<p>
<s><emphasis level="strong">This is emphasized text.</emphasis></s>
<s>And this concludes our long audio sample.</s>
</p>
</speak>
"""
# Create request with SSML
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent="projects/your-project-id/locations/us-central1",
input=texttospeech_v1.SynthesisInput(ssml=long_ssml_content),
audio_config=texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MP3,
speaking_rate=1.0,
pitch=0.0,
volume_gain_db=0.0
),
voice=texttospeech_v1.VoiceSelectionParams(
language_code="en-US",
name="en-US-Neural2-A"
),
output_gcs_uri="gs://your-bucket-name/long-ssml-output.mp3"
)
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
operation = client.synthesize_long_audio(request=request)from google.cloud.texttospeech_v1 import (
SynthesizeLongAudioRequest,
SynthesisInput,
AudioConfig,
VoiceSelectionParams,
AudioEncoding
)
# Complete long audio request configuration
request = SynthesizeLongAudioRequest(
parent="projects/your-project-id/locations/us-central1", # Required: parent resource
input=SynthesisInput(
text="Long text content to synthesize..." # or ssml="<speak>...</speak>"
),
audio_config=AudioConfig(
audio_encoding=AudioEncoding.LINEAR16, # Audio format
sample_rate_hertz=24000, # Sample rate
speaking_rate=1.0, # Speech rate
pitch=0.0, # Pitch adjustment
volume_gain_db=0.0, # Volume gain
effects_profile_id=["large-home-entertainment-class-device"] # Audio effects
),
voice=VoiceSelectionParams(
language_code="en-US", # Required: language
name="en-US-Wavenet-D", # Specific voice
ssml_gender=texttospeech_v1.SsmlVoiceGender.FEMALE
),
output_gcs_uri="gs://your-bucket-name/path/output.wav" # Required: GCS output location
)
# Request with custom pronunciations
request_with_pronunciations = SynthesizeLongAudioRequest(
parent="projects/your-project-id/locations/us-central1",
input=SynthesisInput(text="Text with custom pronunciations for API and JSON terms."),
audio_config=AudioConfig(
audio_encoding=AudioEncoding.MP3,
sample_rate_hertz=22050
),
voice=VoiceSelectionParams(
language_code="en-US",
name="en-US-Neural2-A",
custom_pronunciations=texttospeech_v1.CustomPronunciations(
pronunciations=[
texttospeech_v1.CustomPronunciationParams(
phrase="API",
ipa="ˌeɪ piː ˈaɪ",
phonetic_encoding=texttospeech_v1.CustomPronunciationParams.PhoneticEncoding.IPA
)
]
)
),
output_gcs_uri="gs://your-bucket-name/custom-pronunciation-output.mp3"
)from google.cloud.texttospeech_v1 import SynthesizeLongAudioResponse, SynthesizeLongAudioMetadata
# Response object (returned when operation completes)
# SynthesizeLongAudioResponse is typically empty - the audio is written to GCS
# Metadata object (available during operation)
def process_operation_metadata(operation):
"""Process metadata from long-running operation."""
if operation.metadata:
# Metadata contains progress information
metadata = SynthesizeLongAudioMetadata()
operation.metadata.Unpack(metadata)
print(f"Progress: {metadata.progress_percentage}%")
print(f"Start time: {metadata.start_time}")
if metadata.last_update_time:
print(f"Last update: {metadata.last_update_time}")
return operation.metadata
# Access operation result
def get_operation_result(operation):
"""Get result from completed operation."""
if operation.done():
if operation.error:
print(f"Operation failed: {operation.error}")
return None
else:
result = operation.result()
print("Operation completed successfully")
# Result is typically empty - check GCS for output file
return result
else:
print(f"Operation still running: {operation.name}")
return Noneimport time
from google.api_core import operation
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
def monitor_long_audio_operation(operation_name: str, check_interval: int = 30):
"""Monitor a long-running audio synthesis operation."""
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
# Get operation by name
op = client.get_operation(request={"name": operation_name})
print(f"Monitoring operation: {operation_name}")
while not op.done():
# Process metadata
if op.metadata:
try:
metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
op.metadata.Unpack(metadata)
progress = getattr(metadata, 'progress_percentage', 0)
print(f"Progress: {progress}%")
if hasattr(metadata, 'start_time') and metadata.start_time:
print(f"Started at: {metadata.start_time}")
except Exception as e:
print(f"Could not parse metadata: {e}")
print(f"Operation still running. Checking again in {check_interval} seconds...")
time.sleep(check_interval)
# Refresh operation status
op = client.get_operation(request={"name": operation_name})
# Operation completed
if op.error:
print(f"Operation failed: {op.error}")
return False
else:
print("Operation completed successfully!")
print(f"Output should be available at the specified GCS URI")
return True
# Usage
# operation_name = "projects/your-project/locations/us-central1/operations/long-operation-id"
# success = monitor_long_audio_operation(operation_name)def cancel_long_audio_operation(operation_name: str):
"""Cancel a running long audio synthesis operation."""
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
try:
# Cancel the operation
client.cancel_operation(request={"name": operation_name})
print(f"Cancellation requested for operation: {operation_name}")
# Check if cancellation was successful
op = client.get_operation(request={"name": operation_name})
if op.done():
if op.cancelled():
print("Operation successfully cancelled")
return True
else:
print("Operation completed before cancellation")
return False
else:
print("Cancellation in progress...")
return True
except Exception as e:
print(f"Failed to cancel operation: {e}")
return False
# Usage
# cancel_long_audio_operation("projects/your-project/locations/us-central1/operations/op-id")def list_long_audio_operations(project_id: str, location: str = "us-central1"):
"""List all long audio synthesis operations for a project."""
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
parent = f"projects/{project_id}/locations/{location}"
try:
# List operations
operations = client.list_operations(request={"name": parent})
print(f"Operations in {parent}:")
for op in operations:
print(f"\nOperation: {op.name}")
print(f"Done: {op.done()}")
if op.done():
if op.error:
print(f"Error: {op.error}")
else:
print("Status: Completed successfully")
else:
print("Status: Running")
# Try to get metadata
if op.metadata:
try:
metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
op.metadata.Unpack(metadata)
progress = getattr(metadata, 'progress_percentage', 0)
print(f"Progress: {progress}%")
except:
print("Progress: Unknown")
return operations
except Exception as e:
print(f"Failed to list operations: {e}")
return []
# Usage
# operations = list_long_audio_operations("your-project-id")import os
from google.cloud import storage
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
class AudiobookGenerator:
"""Generate audiobooks from long text content."""
def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):
self.project_id = project_id
self.bucket_name = bucket_name
self.location = location
self.parent = f"projects/{project_id}/locations/{location}"
# Initialize clients
self.tts_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
self.storage_client = storage.Client()
def generate_audiobook(self, text_content: str, output_filename: str,
voice_name: str = "en-US-Wavenet-A",
language_code: str = "en-US"):
"""Generate audiobook from text content."""
# Ensure GCS bucket exists
try:
bucket = self.storage_client.bucket(self.bucket_name)
if not bucket.exists():
bucket = self.storage_client.create_bucket(self.bucket_name)
print(f"Created bucket: {self.bucket_name}")
except Exception as e:
print(f"Bucket setup error: {e}")
return None
# Configure audiobook synthesis
gcs_uri = f"gs://{self.bucket_name}/{output_filename}"
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent=self.parent,
input=texttospeech_v1.SynthesisInput(text=text_content),
audio_config=texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MP3,
sample_rate_hertz=22050,
speaking_rate=0.9, # Slightly slower for audiobooks
volume_gain_db=2.0 # Boost volume
),
voice=texttospeech_v1.VoiceSelectionParams(
language_code=language_code,
name=voice_name
),
output_gcs_uri=gcs_uri
)
print(f"Starting audiobook generation...")
print(f"Output will be saved to: {gcs_uri}")
# Start synthesis
operation = self.tts_client.synthesize_long_audio(request=request)
return {
'operation': operation,
'operation_name': operation.name,
'output_uri': gcs_uri
}
def wait_for_audiobook(self, operation, check_interval: int = 60):
"""Wait for audiobook generation to complete."""
print("Waiting for audiobook generation to complete...")
while not operation.done():
# Get progress
if operation.metadata:
try:
metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
operation.metadata.Unpack(metadata)
progress = getattr(metadata, 'progress_percentage', 0)
print(f"Progress: {progress}%")
except:
print("Checking progress...")
time.sleep(check_interval)
# Refresh operation
operation = self.tts_client.get_operation(
request={"name": operation.name}
)
if operation.error:
print(f"Audiobook generation failed: {operation.error}")
return False
else:
print("Audiobook generation completed successfully!")
return True
def download_audiobook(self, gcs_uri: str, local_filename: str):
"""Download generated audiobook from GCS."""
# Parse GCS URI
if not gcs_uri.startswith("gs://"):
raise ValueError("Invalid GCS URI")
path_parts = gcs_uri[5:].split("/", 1)
bucket_name = path_parts[0]
blob_name = path_parts[1]
# Download file
bucket = self.storage_client.bucket(bucket_name)
blob = bucket.blob(blob_name)
blob.download_to_filename(local_filename)
print(f"Audiobook downloaded to: {local_filename}")
# Get file info
file_size = os.path.getsize(local_filename)
print(f"File size: {file_size / (1024*1024):.2f} MB")
return local_filename
# Usage example
def generate_sample_audiobook():
"""Generate a sample audiobook."""
# Sample long text (could be loaded from file)
sample_text = """
Chapter 1: Introduction
Welcome to this sample audiobook demonstration. This text will be converted
into high-quality speech using Google Cloud Text-to-Speech long audio synthesis.
The long audio synthesis feature is specifically designed for content like this,
where the text is too long for standard synthesis operations. It processes the
content asynchronously and delivers the results to Google Cloud Storage.
Chapter 2: Features
Long audio synthesis supports all the same features as standard synthesis,
including SSML markup, custom voices, and audio configuration options.
The main difference is that it can handle much larger amounts of text
and processes them as long-running operations.
This makes it ideal for generating audiobooks, processing long documents,
or creating extended audio content for podcasts and presentations.
Chapter 3: Conclusion
Thank you for listening to this sample audiobook. The long audio synthesis
feature provides a powerful way to convert large amounts of text into
natural-sounding speech.
""" * 5 # Repeat to make it longer
# Generate audiobook
generator = AudiobookGenerator(
project_id="your-project-id",
bucket_name="your-audiobook-bucket"
)
result = generator.generate_audiobook(
text_content=sample_text,
output_filename="sample_audiobook.mp3",
voice_name="en-US-Wavenet-A"
)
if result:
# Wait for completion
success = generator.wait_for_audiobook(result['operation'])
if success:
# Download the result
generator.download_audiobook(
result['output_uri'],
"local_audiobook.mp3"
)
print("Audiobook generation complete!")
return result
return None
# Run the example
# audiobook_result = generate_sample_audiobook()import concurrent.futures
from typing import List, Dict
class BatchDocumentProcessor:
"""Process multiple documents for long audio synthesis."""
def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):
self.project_id = project_id
self.bucket_name = bucket_name
self.location = location
self.parent = f"projects/{project_id}/locations/{location}"
self.client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
def process_document_batch(self, documents: List[Dict], max_workers: int = 5):
"""Process multiple documents in parallel."""
def process_single_document(doc_info):
"""Process a single document."""
try:
doc_name = doc_info['name']
text_content = doc_info['content']
voice_config = doc_info.get('voice', {})
audio_config = doc_info.get('audio', {})
# Default configurations
voice_name = voice_config.get('name', 'en-US-Wavenet-A')
language_code = voice_config.get('language_code', 'en-US')
audio_encoding = audio_config.get('encoding', texttospeech_v1.AudioEncoding.MP3)
sample_rate = audio_config.get('sample_rate', 22050)
# Create request
output_uri = f"gs://{self.bucket_name}/batch/{doc_name}.mp3"
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent=self.parent,
input=texttospeech_v1.SynthesisInput(text=text_content),
audio_config=texttospeech_v1.AudioConfig(
audio_encoding=audio_encoding,
sample_rate_hertz=sample_rate
),
voice=texttospeech_v1.VoiceSelectionParams(
language_code=language_code,
name=voice_name
),
output_gcs_uri=output_uri
)
# Start synthesis
operation = self.client.synthesize_long_audio(request=request)
return {
'document': doc_name,
'operation_name': operation.name,
'output_uri': output_uri,
'success': True,
'operation': operation
}
except Exception as e:
return {
'document': doc_info['name'],
'operation_name': None,
'output_uri': None,
'success': False,
'error': str(e)
}
# Process documents in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
results = list(executor.map(process_single_document, documents))
return results
def monitor_batch_operations(self, operation_results: List[Dict],
check_interval: int = 30):
"""Monitor multiple long-running operations."""
pending_operations = [r for r in operation_results if r['success']]
completed_operations = []
print(f"Monitoring {len(pending_operations)} operations...")
while pending_operations:
still_pending = []
for op_result in pending_operations:
try:
# Check operation status
operation = self.client.get_operation(
request={"name": op_result['operation_name']}
)
if operation.done():
if operation.error:
op_result['final_status'] = 'failed'
op_result['error'] = str(operation.error)
print(f"❌ {op_result['document']}: Failed")
else:
op_result['final_status'] = 'completed'
print(f"✅ {op_result['document']}: Completed")
completed_operations.append(op_result)
else:
# Still running
if operation.metadata:
try:
metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
operation.metadata.Unpack(metadata)
progress = getattr(metadata, 'progress_percentage', 0)
print(f"⏳ {op_result['document']}: {progress}%")
except:
print(f"⏳ {op_result['document']}: In progress...")
still_pending.append(op_result)
except Exception as e:
print(f"Error checking {op_result['document']}: {e}")
still_pending.append(op_result)
pending_operations = still_pending
if pending_operations:
print(f"\n{len(pending_operations)} operations still running. "
f"Checking again in {check_interval} seconds...\n")
time.sleep(check_interval)
print(f"\nBatch processing complete!")
print(f"Completed: {len([op for op in completed_operations if op.get('final_status') == 'completed'])}")
print(f"Failed: {len([op for op in completed_operations if op.get('final_status') == 'failed'])}")
return completed_operations
# Usage example
def batch_process_example():
"""Example of batch processing multiple documents."""
# Sample documents
documents = [
{
'name': 'document1',
'content': 'This is the first document content. ' * 100,
'voice': {'name': 'en-US-Neural2-A', 'language_code': 'en-US'},
'audio': {'encoding': texttospeech_v1.AudioEncoding.MP3, 'sample_rate': 22050}
},
{
'name': 'document2',
'content': 'This is the second document content. ' * 100,
'voice': {'name': 'en-US-Wavenet-D', 'language_code': 'en-US'},
'audio': {'encoding': texttospeech_v1.AudioEncoding.LINEAR16, 'sample_rate': 24000}
},
{
'name': 'document3',
'content': 'This is the third document content. ' * 100,
'voice': {'name': 'en-US-Standard-B', 'language_code': 'en-US'},
'audio': {'encoding': texttospeech_v1.AudioEncoding.OGG_OPUS, 'sample_rate': 48000}
}
]
# Process batch
processor = BatchDocumentProcessor(
project_id="your-project-id",
bucket_name="your-batch-bucket"
)
# Start batch processing
results = processor.process_document_batch(documents, max_workers=3)
# Monitor operations
final_results = processor.monitor_batch_operations(results)
return final_results
# Run batch processing
# batch_results = batch_process_example()from google.api_core import exceptions
import logging
def robust_long_audio_synthesis(text_content: str, output_gcs_uri: str,
project_id: str, location: str = "us-central1"):
"""Long audio synthesis with comprehensive error handling."""
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
parent = f"projects/{project_id}/locations/{location}"
try:
# Validate inputs
if not text_content or not text_content.strip():
raise ValueError("Text content cannot be empty")
if not output_gcs_uri.startswith("gs://"):
raise ValueError("Output URI must be a valid GCS URI (gs://...)")
# Create request
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent=parent,
input=texttospeech_v1.SynthesisInput(text=text_content),
audio_config=texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MP3,
sample_rate_hertz=22050
),
voice=texttospeech_v1.VoiceSelectionParams(
language_code="en-US",
name="en-US-Neural2-A"
),
output_gcs_uri=output_gcs_uri
)
# Start operation
operation = client.synthesize_long_audio(request=request)
return {
'success': True,
'operation': operation,
'operation_name': operation.name
}
except exceptions.InvalidArgument as e:
logging.error(f"Invalid request parameters: {e}")
return {'success': False, 'error': 'Invalid parameters', 'details': str(e)}
except exceptions.PermissionDenied as e:
logging.error(f"Permission denied: {e}")
return {'success': False, 'error': 'Permission denied', 'details': str(e)}
except exceptions.ResourceExhausted as e:
logging.error(f"Quota exceeded: {e}")
return {'success': False, 'error': 'Quota exceeded', 'details': str(e)}
except exceptions.FailedPrecondition as e:
logging.error(f"Failed precondition: {e}")
return {'success': False, 'error': 'Precondition failed', 'details': str(e)}
except exceptions.NotFound as e:
logging.error(f"Resource not found: {e}")
return {'success': False, 'error': 'Resource not found', 'details': str(e)}
except Exception as e:
logging.error(f"Unexpected error: {e}")
return {'success': False, 'error': 'Unexpected error', 'details': str(e)}
# Usage with error handling
result = robust_long_audio_synthesis(
text_content="Long text content...",
output_gcs_uri="gs://your-bucket/output.mp3",
project_id="your-project-id"
)
if result['success']:
print(f"Operation started: {result['operation_name']}")
else:
print(f"Error: {result['error']} - {result['details']}")class LongAudioBestPractices:
"""Best practices for long audio synthesis."""
@staticmethod
def validate_text_length(text: str) -> bool:
"""Validate text length for long audio synthesis."""
# Recommended maximum: ~1 million characters
MAX_CHARS = 1_000_000
if len(text) > MAX_CHARS:
print(f"Warning: Text length ({len(text)}) exceeds recommended maximum ({MAX_CHARS})")
return False
return True
@staticmethod
def optimize_text_for_synthesis(text: str) -> str:
"""Optimize text content for better synthesis."""
import re
# Remove excessive whitespace
text = re.sub(r'\s+', ' ', text)
# Add proper punctuation for better pacing
text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)
# Ensure paragraph breaks
text = re.sub(r'\n\s*\n', '\n\n', text)
return text.strip()
@staticmethod
def choose_optimal_voice(content_type: str, language: str = "en-US") -> str:
"""Choose optimal voice based on content type."""
voice_recommendations = {
"audiobook": f"{language}-Wavenet-A", # Clear, pleasant for long listening
"news": f"{language}-Neural2-C", # Authoritative
"educational": f"{language}-Neural2-A", # Clear, engaging
"documentation": f"{language}-Standard-A", # Clear, efficient
"narrative": f"{language}-Wavenet-D" # Expressive
}
return voice_recommendations.get(content_type, f"{language}-Neural2-A")
@staticmethod
def create_optimal_audio_config(use_case: str) -> texttospeech_v1.AudioConfig:
"""Create optimal audio configuration for different use cases."""
configs = {
"audiobook": texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MP3,
sample_rate_hertz=22050,
speaking_rate=0.9,
volume_gain_db=2.0
),
"podcast": texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MP3,
sample_rate_hertz=44100,
speaking_rate=1.0,
volume_gain_db=1.0,
effects_profile_id=["large-home-entertainment-class-device"]
),
"telephony": texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.MULAW,
sample_rate_hertz=8000,
speaking_rate=1.1,
effects_profile_id=["telephony-class-application"]
),
"archive": texttospeech_v1.AudioConfig(
audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,
sample_rate_hertz=48000,
speaking_rate=1.0
)
}
return configs.get(use_case, configs["audiobook"])
# Apply best practices
def create_optimized_long_audio_request(text_content: str, output_uri: str,
content_type: str = "audiobook"):
"""Create optimized long audio request following best practices."""
# Validate and optimize text
if not LongAudioBestPractices.validate_text_length(text_content):
print("Consider breaking content into smaller chunks")
optimized_text = LongAudioBestPractices.optimize_text_for_synthesis(text_content)
# Choose optimal voice and config
voice_name = LongAudioBestPractices.choose_optimal_voice(content_type)
audio_config = LongAudioBestPractices.create_optimal_audio_config(content_type)
# Create request
request = texttospeech_v1.SynthesizeLongAudioRequest(
parent="projects/your-project-id/locations/us-central1",
input=texttospeech_v1.SynthesisInput(text=optimized_text),
audio_config=audio_config,
voice=texttospeech_v1.VoiceSelectionParams(
language_code="en-US",
name=voice_name
),
output_gcs_uri=output_uri
)
return requestInstall with Tessl CLI
npx tessl i tessl/pypi-google-cloud-texttospeech