Tessl Tile for pypi/google-cloud-texttospeech@2.29.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

async-clients.md configuration-types.md index.md long-audio-synthesis.md speech-synthesis.md streaming-synthesis.md voice-management.md

long-audio-synthesis.mddocs/

0
# Long Audio Synthesis
1

2
## Overview
3

4
Long audio synthesis is designed for generating extended audio content that exceeds the limits of standard synthesis operations. It uses Google Cloud's long-running operations (LRO) pattern to handle large-scale text-to-speech generation asynchronously, with output delivered to Google Cloud Storage.
5

6
**Key Features:**
7
- Supports very large text inputs (up to several hours of audio)
8
- Asynchronous processing with operation monitoring
9
- Direct output to Google Cloud Storage
10
- Progress tracking and metadata
11
- Suitable for audiobooks, long documents, and batch processing
12

13
## Client Setup
14

15
### Long Audio Synthesis Clients
16

17
```api { .api }
18
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
19

20
# Synchronous long audio client
21
long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
22

23
# Asynchronous long audio client  
24
async_long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeAsyncClient()
25

26
# Alternative import paths
27
from google.cloud import texttospeech_v1
28

29
# Through main module
30
long_client = texttospeech_v1.services.text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
31
```
32

33
### Authentication and Project Setup
34

35
```api { .api }
36
import os
37
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
38

39
# Set up authentication (if not using default credentials)
40
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/service-account-key.json'
41

42
# Initialize with explicit project
43
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
44

45
# Project and location information
46
PROJECT_ID = "your-project-id"
47
LOCATION = "us-central1"  # or other supported location
48
PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}"
49
```
50

51
## Core Long Audio Operations
52

53
### Basic Long Audio Synthesis
54

55
```api { .api }
56
from google.cloud import texttospeech_v1
57
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
58

59
# Initialize client
60
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
61

62
# Create long audio synthesis request
63
request = texttospeech_v1.SynthesizeLongAudioRequest(
64
    parent="projects/your-project-id/locations/us-central1",
65
    input=texttospeech_v1.SynthesisInput(
66
        text="This is a very long text that will be converted to audio. " * 100
67
    ),
68
    audio_config=texttospeech_v1.AudioConfig(
69
        audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,
70
        sample_rate_hertz=22050
71
    ),
72
    voice=texttospeech_v1.VoiceSelectionParams(
73
        language_code="en-US",
74
        name="en-US-Wavenet-A"
75
    ),
76
    output_gcs_uri="gs://your-bucket-name/output-audio.wav"
77
)
78

79
# Start long-running operation
80
operation = client.synthesize_long_audio(request=request)
81

82
print(f"Operation name: {operation.name}")
83
print("Long audio synthesis started...")
84

85
# Wait for completion
86
result = operation.result()  # Blocks until complete
87

88
print("Long audio synthesis completed!")
89
print(f"Result: {result}")
90
```
91

92
### SSML Long Audio Synthesis
93

94
```api { .api }
95
from google.cloud import texttospeech_v1
96

97
# Prepare long SSML content
98
long_ssml_content = """
99
<speak>
100
    <p>
101
        <s>Welcome to this long audio demonstration.</s>
102
        <s>This content will be processed as a long-running operation.</s>
103
    </p>
104
    
105
    <break time="2s"/>
106
    
107
    <p>
108
        <s>Here we have multiple paragraphs with various SSML features.</s>
109
        <s><prosody rate="slow">This part is spoken slowly.</prosody></s>
110
        <s><prosody rate="fast">While this part is much faster.</prosody></s>
111
    </p>
112
    
113
    <break time="3s"/>
114
    
115
    <p>
116
        <s><emphasis level="strong">This is emphasized text.</emphasis></s>
117
        <s>And this concludes our long audio sample.</s>
118
    </p>
119
</speak>
120
"""
121

122
# Create request with SSML
123
request = texttospeech_v1.SynthesizeLongAudioRequest(
124
    parent="projects/your-project-id/locations/us-central1",
125
    input=texttospeech_v1.SynthesisInput(ssml=long_ssml_content),
126
    audio_config=texttospeech_v1.AudioConfig(
127
        audio_encoding=texttospeech_v1.AudioEncoding.MP3,
128
        speaking_rate=1.0,
129
        pitch=0.0,
130
        volume_gain_db=0.0
131
    ),
132
    voice=texttospeech_v1.VoiceSelectionParams(
133
        language_code="en-US",
134
        name="en-US-Neural2-A"
135
    ),
136
    output_gcs_uri="gs://your-bucket-name/long-ssml-output.mp3"
137
)
138

139
client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
140
operation = client.synthesize_long_audio(request=request)
141
```
142

143
## Request and Response Types
144

145
### SynthesizeLongAudioRequest
146

147
```api { .api }
148
from google.cloud.texttospeech_v1 import (
149
    SynthesizeLongAudioRequest,
150
    SynthesisInput,
151
    AudioConfig,
152
    VoiceSelectionParams,
153
    AudioEncoding
154
)
155

156
# Complete long audio request configuration
157
request = SynthesizeLongAudioRequest(
158
    parent="projects/your-project-id/locations/us-central1",  # Required: parent resource
159
    
160
    input=SynthesisInput(
161
        text="Long text content to synthesize..."  # or ssml="<speak>...</speak>"
162
    ),
163
    
164
    audio_config=AudioConfig(
165
        audio_encoding=AudioEncoding.LINEAR16,      # Audio format
166
        sample_rate_hertz=24000,                    # Sample rate
167
        speaking_rate=1.0,                          # Speech rate
168
        pitch=0.0,                                  # Pitch adjustment
169
        volume_gain_db=0.0,                         # Volume gain
170
        effects_profile_id=["large-home-entertainment-class-device"]  # Audio effects
171
    ),
172
    
173
    voice=VoiceSelectionParams(
174
        language_code="en-US",                      # Required: language
175
        name="en-US-Wavenet-D",                     # Specific voice
176
        ssml_gender=texttospeech_v1.SsmlVoiceGender.FEMALE
177
    ),
178
    
179
    output_gcs_uri="gs://your-bucket-name/path/output.wav"  # Required: GCS output location
180
)
181

182
# Request with custom pronunciations
183
request_with_pronunciations = SynthesizeLongAudioRequest(
184
    parent="projects/your-project-id/locations/us-central1",
185
    input=SynthesisInput(text="Text with custom pronunciations for API and JSON terms."),
186
    audio_config=AudioConfig(
187
        audio_encoding=AudioEncoding.MP3,
188
        sample_rate_hertz=22050
189
    ),
190
    voice=VoiceSelectionParams(
191
        language_code="en-US", 
192
        name="en-US-Neural2-A",
193
        custom_pronunciations=texttospeech_v1.CustomPronunciations(
194
            pronunciations=[
195
                texttospeech_v1.CustomPronunciationParams(
196
                    phrase="API",
197
                    ipa="ˌeɪ piː ˈaɪ",
198
                    phonetic_encoding=texttospeech_v1.CustomPronunciationParams.PhoneticEncoding.IPA
199
                )
200
            ]
201
        )
202
    ),
203
    output_gcs_uri="gs://your-bucket-name/custom-pronunciation-output.mp3"
204
)
205
```
206

207
### SynthesizeLongAudioResponse and Metadata
208

209
```api { .api }
210
from google.cloud.texttospeech_v1 import SynthesizeLongAudioResponse, SynthesizeLongAudioMetadata
211

212
# Response object (returned when operation completes)
213
# SynthesizeLongAudioResponse is typically empty - the audio is written to GCS
214

215
# Metadata object (available during operation)
216
def process_operation_metadata(operation):
217
    """Process metadata from long-running operation."""
218
    
219
    if operation.metadata:
220
        # Metadata contains progress information
221
        metadata = SynthesizeLongAudioMetadata()
222
        operation.metadata.Unpack(metadata)
223
        
224
        print(f"Progress: {metadata.progress_percentage}%")
225
        print(f"Start time: {metadata.start_time}")
226
        
227
        if metadata.last_update_time:
228
            print(f"Last update: {metadata.last_update_time}")
229
    
230
    return operation.metadata
231

232
# Access operation result
233
def get_operation_result(operation):
234
    """Get result from completed operation."""
235
    
236
    if operation.done():
237
        if operation.error:
238
            print(f"Operation failed: {operation.error}")
239
            return None
240
        else:
241
            result = operation.result()
242
            print("Operation completed successfully")
243
            # Result is typically empty - check GCS for output file
244
            return result
245
    else:
246
        print(f"Operation still running: {operation.name}")
247
        return None
248
```
249

250
## Operation Management
251

252
### Monitoring Long-Running Operations
253

254
```api { .api }
255
import time
256
from google.api_core import operation
257
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
258

259
def monitor_long_audio_operation(operation_name: str, check_interval: int = 30):
260
    """Monitor a long-running audio synthesis operation."""
261
    
262
    client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
263
    
264
    # Get operation by name
265
    op = client.get_operation(request={"name": operation_name})
266
    
267
    print(f"Monitoring operation: {operation_name}")
268
    
269
    while not op.done():
270
        # Process metadata
271
        if op.metadata:
272
            try:
273
                metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
274
                op.metadata.Unpack(metadata)
275
                
276
                progress = getattr(metadata, 'progress_percentage', 0)
277
                print(f"Progress: {progress}%")
278
                
279
                if hasattr(metadata, 'start_time') and metadata.start_time:
280
                    print(f"Started at: {metadata.start_time}")
281
                
282
            except Exception as e:
283
                print(f"Could not parse metadata: {e}")
284
        
285
        print(f"Operation still running. Checking again in {check_interval} seconds...")
286
        time.sleep(check_interval)
287
        
288
        # Refresh operation status
289
        op = client.get_operation(request={"name": operation_name})
290
    
291
    # Operation completed
292
    if op.error:
293
        print(f"Operation failed: {op.error}")
294
        return False
295
    else:
296
        print("Operation completed successfully!")
297
        print(f"Output should be available at the specified GCS URI")
298
        return True
299

300
# Usage
301
# operation_name = "projects/your-project/locations/us-central1/operations/long-operation-id"
302
# success = monitor_long_audio_operation(operation_name)
303
```
304

305
### Cancelling Operations
306

307
```api { .api }
308
def cancel_long_audio_operation(operation_name: str):
309
    """Cancel a running long audio synthesis operation."""
310
    
311
    client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
312
    
313
    try:
314
        # Cancel the operation
315
        client.cancel_operation(request={"name": operation_name})
316
        print(f"Cancellation requested for operation: {operation_name}")
317
        
318
        # Check if cancellation was successful
319
        op = client.get_operation(request={"name": operation_name})
320
        
321
        if op.done():
322
            if op.cancelled():
323
                print("Operation successfully cancelled")
324
                return True
325
            else:
326
                print("Operation completed before cancellation")
327
                return False
328
        else:
329
            print("Cancellation in progress...")
330
            return True
331
            
332
    except Exception as e:
333
        print(f"Failed to cancel operation: {e}")
334
        return False
335

336
# Usage
337
# cancel_long_audio_operation("projects/your-project/locations/us-central1/operations/op-id")
338
```
339

340
### Listing Operations
341

342
```api { .api }
343
def list_long_audio_operations(project_id: str, location: str = "us-central1"):
344
    """List all long audio synthesis operations for a project."""
345
    
346
    client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
347
    
348
    parent = f"projects/{project_id}/locations/{location}"
349
    
350
    try:
351
        # List operations
352
        operations = client.list_operations(request={"name": parent})
353
        
354
        print(f"Operations in {parent}:")
355
        
356
        for op in operations:
357
            print(f"\nOperation: {op.name}")
358
            print(f"Done: {op.done()}")
359
            
360
            if op.done():
361
                if op.error:
362
                    print(f"Error: {op.error}")
363
                else:
364
                    print("Status: Completed successfully")
365
            else:
366
                print("Status: Running")
367
                
368
                # Try to get metadata
369
                if op.metadata:
370
                    try:
371
                        metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
372
                        op.metadata.Unpack(metadata)
373
                        progress = getattr(metadata, 'progress_percentage', 0)
374
                        print(f"Progress: {progress}%")
375
                    except:
376
                        print("Progress: Unknown")
377
        
378
        return operations
379
        
380
    except Exception as e:
381
        print(f"Failed to list operations: {e}")
382
        return []
383

384
# Usage
385
# operations = list_long_audio_operations("your-project-id")
386
```
387

388
## Practical Examples
389

390
### Audiobook Generation
391

392
```api { .api }
393
import os
394
from google.cloud import storage
395
from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize
396

397
class AudiobookGenerator:
398
    """Generate audiobooks from long text content."""
399
    
400
    def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):
401
        self.project_id = project_id
402
        self.bucket_name = bucket_name
403
        self.location = location
404
        self.parent = f"projects/{project_id}/locations/{location}"
405
        
406
        # Initialize clients
407
        self.tts_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
408
        self.storage_client = storage.Client()
409
    
410
    def generate_audiobook(self, text_content: str, output_filename: str, 
411
                          voice_name: str = "en-US-Wavenet-A",
412
                          language_code: str = "en-US"):
413
        """Generate audiobook from text content."""
414
        
415
        # Ensure GCS bucket exists
416
        try:
417
            bucket = self.storage_client.bucket(self.bucket_name)
418
            if not bucket.exists():
419
                bucket = self.storage_client.create_bucket(self.bucket_name)
420
                print(f"Created bucket: {self.bucket_name}")
421
        except Exception as e:
422
            print(f"Bucket setup error: {e}")
423
            return None
424
        
425
        # Configure audiobook synthesis
426
        gcs_uri = f"gs://{self.bucket_name}/{output_filename}"
427
        
428
        request = texttospeech_v1.SynthesizeLongAudioRequest(
429
            parent=self.parent,
430
            input=texttospeech_v1.SynthesisInput(text=text_content),
431
            audio_config=texttospeech_v1.AudioConfig(
432
                audio_encoding=texttospeech_v1.AudioEncoding.MP3,
433
                sample_rate_hertz=22050,
434
                speaking_rate=0.9,  # Slightly slower for audiobooks
435
                volume_gain_db=2.0   # Boost volume
436
            ),
437
            voice=texttospeech_v1.VoiceSelectionParams(
438
                language_code=language_code,
439
                name=voice_name
440
            ),
441
            output_gcs_uri=gcs_uri
442
        )
443
        
444
        print(f"Starting audiobook generation...")
445
        print(f"Output will be saved to: {gcs_uri}")
446
        
447
        # Start synthesis
448
        operation = self.tts_client.synthesize_long_audio(request=request)
449
        
450
        return {
451
            'operation': operation,
452
            'operation_name': operation.name,
453
            'output_uri': gcs_uri
454
        }
455
    
456
    def wait_for_audiobook(self, operation, check_interval: int = 60):
457
        """Wait for audiobook generation to complete."""
458
        
459
        print("Waiting for audiobook generation to complete...")
460
        
461
        while not operation.done():
462
            # Get progress
463
            if operation.metadata:
464
                try:
465
                    metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
466
                    operation.metadata.Unpack(metadata)
467
                    progress = getattr(metadata, 'progress_percentage', 0)
468
                    print(f"Progress: {progress}%")
469
                except:
470
                    print("Checking progress...")
471
            
472
            time.sleep(check_interval)
473
            
474
            # Refresh operation
475
            operation = self.tts_client.get_operation(
476
                request={"name": operation.name}
477
            )
478
        
479
        if operation.error:
480
            print(f"Audiobook generation failed: {operation.error}")
481
            return False
482
        else:
483
            print("Audiobook generation completed successfully!")
484
            return True
485
    
486
    def download_audiobook(self, gcs_uri: str, local_filename: str):
487
        """Download generated audiobook from GCS."""
488
        
489
        # Parse GCS URI
490
        if not gcs_uri.startswith("gs://"):
491
            raise ValueError("Invalid GCS URI")
492
        
493
        path_parts = gcs_uri[5:].split("/", 1)
494
        bucket_name = path_parts[0]
495
        blob_name = path_parts[1]
496
        
497
        # Download file
498
        bucket = self.storage_client.bucket(bucket_name)
499
        blob = bucket.blob(blob_name)
500
        
501
        blob.download_to_filename(local_filename)
502
        print(f"Audiobook downloaded to: {local_filename}")
503
        
504
        # Get file info
505
        file_size = os.path.getsize(local_filename)
506
        print(f"File size: {file_size / (1024*1024):.2f} MB")
507
        
508
        return local_filename
509

510
# Usage example
511
def generate_sample_audiobook():
512
    """Generate a sample audiobook."""
513
    
514
    # Sample long text (could be loaded from file)
515
    sample_text = """
516
    Chapter 1: Introduction
517
    
518
    Welcome to this sample audiobook demonstration. This text will be converted
519
    into high-quality speech using Google Cloud Text-to-Speech long audio synthesis.
520
    
521
    The long audio synthesis feature is specifically designed for content like this,
522
    where the text is too long for standard synthesis operations. It processes the
523
    content asynchronously and delivers the results to Google Cloud Storage.
524
    
525
    Chapter 2: Features
526
    
527
    Long audio synthesis supports all the same features as standard synthesis,
528
    including SSML markup, custom voices, and audio configuration options.
529
    The main difference is that it can handle much larger amounts of text
530
    and processes them as long-running operations.
531
    
532
    This makes it ideal for generating audiobooks, processing long documents,
533
    or creating extended audio content for podcasts and presentations.
534
    
535
    Chapter 3: Conclusion
536
    
537
    Thank you for listening to this sample audiobook. The long audio synthesis
538
    feature provides a powerful way to convert large amounts of text into
539
    natural-sounding speech.
540
    """ * 5  # Repeat to make it longer
541
    
542
    # Generate audiobook
543
    generator = AudiobookGenerator(
544
        project_id="your-project-id",
545
        bucket_name="your-audiobook-bucket"
546
    )
547
    
548
    result = generator.generate_audiobook(
549
        text_content=sample_text,
550
        output_filename="sample_audiobook.mp3",
551
        voice_name="en-US-Wavenet-A"
552
    )
553
    
554
    if result:
555
        # Wait for completion
556
        success = generator.wait_for_audiobook(result['operation'])
557
        
558
        if success:
559
            # Download the result
560
            generator.download_audiobook(
561
                result['output_uri'],
562
                "local_audiobook.mp3"
563
            )
564
            print("Audiobook generation complete!")
565
        
566
        return result
567
    
568
    return None
569

570
# Run the example
571
# audiobook_result = generate_sample_audiobook()
572
```
573

574
### Batch Document Processing
575

576
```api { .api }
577
import concurrent.futures
578
from typing import List, Dict
579

580
class BatchDocumentProcessor:
581
    """Process multiple documents for long audio synthesis."""
582
    
583
    def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):
584
        self.project_id = project_id
585
        self.bucket_name = bucket_name
586
        self.location = location
587
        self.parent = f"projects/{project_id}/locations/{location}"
588
        
589
        self.client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
590
    
591
    def process_document_batch(self, documents: List[Dict], max_workers: int = 5):
592
        """Process multiple documents in parallel."""
593
        
594
        def process_single_document(doc_info):
595
            """Process a single document."""
596
            try:
597
                doc_name = doc_info['name']
598
                text_content = doc_info['content']
599
                voice_config = doc_info.get('voice', {})
600
                audio_config = doc_info.get('audio', {})
601
                
602
                # Default configurations
603
                voice_name = voice_config.get('name', 'en-US-Wavenet-A')
604
                language_code = voice_config.get('language_code', 'en-US')
605
                
606
                audio_encoding = audio_config.get('encoding', texttospeech_v1.AudioEncoding.MP3)
607
                sample_rate = audio_config.get('sample_rate', 22050)
608
                
609
                # Create request
610
                output_uri = f"gs://{self.bucket_name}/batch/{doc_name}.mp3"
611
                
612
                request = texttospeech_v1.SynthesizeLongAudioRequest(
613
                    parent=self.parent,
614
                    input=texttospeech_v1.SynthesisInput(text=text_content),
615
                    audio_config=texttospeech_v1.AudioConfig(
616
                        audio_encoding=audio_encoding,
617
                        sample_rate_hertz=sample_rate
618
                    ),
619
                    voice=texttospeech_v1.VoiceSelectionParams(
620
                        language_code=language_code,
621
                        name=voice_name
622
                    ),
623
                    output_gcs_uri=output_uri
624
                )
625
                
626
                # Start synthesis
627
                operation = self.client.synthesize_long_audio(request=request)
628
                
629
                return {
630
                    'document': doc_name,
631
                    'operation_name': operation.name,
632
                    'output_uri': output_uri,
633
                    'success': True,
634
                    'operation': operation
635
                }
636
                
637
            except Exception as e:
638
                return {
639
                    'document': doc_info['name'],
640
                    'operation_name': None,
641
                    'output_uri': None,
642
                    'success': False,
643
                    'error': str(e)
644
                }
645
        
646
        # Process documents in parallel
647
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
648
            results = list(executor.map(process_single_document, documents))
649
        
650
        return results
651
    
652
    def monitor_batch_operations(self, operation_results: List[Dict], 
653
                                check_interval: int = 30):
654
        """Monitor multiple long-running operations."""
655
        
656
        pending_operations = [r for r in operation_results if r['success']]
657
        completed_operations = []
658
        
659
        print(f"Monitoring {len(pending_operations)} operations...")
660
        
661
        while pending_operations:
662
            still_pending = []
663
            
664
            for op_result in pending_operations:
665
                try:
666
                    # Check operation status
667
                    operation = self.client.get_operation(
668
                        request={"name": op_result['operation_name']}
669
                    )
670
                    
671
                    if operation.done():
672
                        if operation.error:
673
                            op_result['final_status'] = 'failed'
674
                            op_result['error'] = str(operation.error)
675
                            print(f"❌ {op_result['document']}: Failed")
676
                        else:
677
                            op_result['final_status'] = 'completed'
678
                            print(f"✅ {op_result['document']}: Completed")
679
                        
680
                        completed_operations.append(op_result)
681
                    else:
682
                        # Still running
683
                        if operation.metadata:
684
                            try:
685
                                metadata = texttospeech_v1.SynthesizeLongAudioMetadata()
686
                                operation.metadata.Unpack(metadata)
687
                                progress = getattr(metadata, 'progress_percentage', 0)
688
                                print(f"⏳ {op_result['document']}: {progress}%")
689
                            except:
690
                                print(f"⏳ {op_result['document']}: In progress...")
691
                        
692
                        still_pending.append(op_result)
693
                
694
                except Exception as e:
695
                    print(f"Error checking {op_result['document']}: {e}")
696
                    still_pending.append(op_result)
697
            
698
            pending_operations = still_pending
699
            
700
            if pending_operations:
701
                print(f"\n{len(pending_operations)} operations still running. "
702
                      f"Checking again in {check_interval} seconds...\n")
703
                time.sleep(check_interval)
704
        
705
        print(f"\nBatch processing complete!")
706
        print(f"Completed: {len([op for op in completed_operations if op.get('final_status') == 'completed'])}")
707
        print(f"Failed: {len([op for op in completed_operations if op.get('final_status') == 'failed'])}")
708
        
709
        return completed_operations
710

711
# Usage example
712
def batch_process_example():
713
    """Example of batch processing multiple documents."""
714
    
715
    # Sample documents
716
    documents = [
717
        {
718
            'name': 'document1',
719
            'content': 'This is the first document content. ' * 100,
720
            'voice': {'name': 'en-US-Neural2-A', 'language_code': 'en-US'},
721
            'audio': {'encoding': texttospeech_v1.AudioEncoding.MP3, 'sample_rate': 22050}
722
        },
723
        {
724
            'name': 'document2', 
725
            'content': 'This is the second document content. ' * 100,
726
            'voice': {'name': 'en-US-Wavenet-D', 'language_code': 'en-US'},
727
            'audio': {'encoding': texttospeech_v1.AudioEncoding.LINEAR16, 'sample_rate': 24000}
728
        },
729
        {
730
            'name': 'document3',
731
            'content': 'This is the third document content. ' * 100,
732
            'voice': {'name': 'en-US-Standard-B', 'language_code': 'en-US'},
733
            'audio': {'encoding': texttospeech_v1.AudioEncoding.OGG_OPUS, 'sample_rate': 48000}
734
        }
735
    ]
736
    
737
    # Process batch
738
    processor = BatchDocumentProcessor(
739
        project_id="your-project-id",
740
        bucket_name="your-batch-bucket"
741
    )
742
    
743
    # Start batch processing
744
    results = processor.process_document_batch(documents, max_workers=3)
745
    
746
    # Monitor operations
747
    final_results = processor.monitor_batch_operations(results)
748
    
749
    return final_results
750

751
# Run batch processing
752
# batch_results = batch_process_example()
753
```
754

755
## Error Handling and Best Practices
756

757
### Comprehensive Error Handling
758

759
```api { .api }
760
from google.api_core import exceptions
761
import logging
762

763
def robust_long_audio_synthesis(text_content: str, output_gcs_uri: str, 
764
                               project_id: str, location: str = "us-central1"):
765
    """Long audio synthesis with comprehensive error handling."""
766
    
767
    client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()
768
    parent = f"projects/{project_id}/locations/{location}"
769
    
770
    try:
771
        # Validate inputs
772
        if not text_content or not text_content.strip():
773
            raise ValueError("Text content cannot be empty")
774
        
775
        if not output_gcs_uri.startswith("gs://"):
776
            raise ValueError("Output URI must be a valid GCS URI (gs://...)")
777
        
778
        # Create request
779
        request = texttospeech_v1.SynthesizeLongAudioRequest(
780
            parent=parent,
781
            input=texttospeech_v1.SynthesisInput(text=text_content),
782
            audio_config=texttospeech_v1.AudioConfig(
783
                audio_encoding=texttospeech_v1.AudioEncoding.MP3,
784
                sample_rate_hertz=22050
785
            ),
786
            voice=texttospeech_v1.VoiceSelectionParams(
787
                language_code="en-US",
788
                name="en-US-Neural2-A"
789
            ),
790
            output_gcs_uri=output_gcs_uri
791
        )
792
        
793
        # Start operation
794
        operation = client.synthesize_long_audio(request=request)
795
        
796
        return {
797
            'success': True,
798
            'operation': operation,
799
            'operation_name': operation.name
800
        }
801
        
802
    except exceptions.InvalidArgument as e:
803
        logging.error(f"Invalid request parameters: {e}")
804
        return {'success': False, 'error': 'Invalid parameters', 'details': str(e)}
805
    
806
    except exceptions.PermissionDenied as e:
807
        logging.error(f"Permission denied: {e}")
808
        return {'success': False, 'error': 'Permission denied', 'details': str(e)}
809
    
810
    except exceptions.ResourceExhausted as e:
811
        logging.error(f"Quota exceeded: {e}")
812
        return {'success': False, 'error': 'Quota exceeded', 'details': str(e)}
813
    
814
    except exceptions.FailedPrecondition as e:
815
        logging.error(f"Failed precondition: {e}")
816
        return {'success': False, 'error': 'Precondition failed', 'details': str(e)}
817
    
818
    except exceptions.NotFound as e:
819
        logging.error(f"Resource not found: {e}")
820
        return {'success': False, 'error': 'Resource not found', 'details': str(e)}
821
    
822
    except Exception as e:
823
        logging.error(f"Unexpected error: {e}")
824
        return {'success': False, 'error': 'Unexpected error', 'details': str(e)}
825

826
# Usage with error handling
827
result = robust_long_audio_synthesis(
828
    text_content="Long text content...",
829
    output_gcs_uri="gs://your-bucket/output.mp3",
830
    project_id="your-project-id"
831
)
832

833
if result['success']:
834
    print(f"Operation started: {result['operation_name']}")
835
else:
836
    print(f"Error: {result['error']} - {result['details']}")
837
```
838

839
### Best Practices for Long Audio Synthesis
840

841
```api { .api }
842
class LongAudioBestPractices:
843
    """Best practices for long audio synthesis."""
844
    
845
    @staticmethod
846
    def validate_text_length(text: str) -> bool:
847
        """Validate text length for long audio synthesis."""
848
        # Recommended maximum: ~1 million characters
849
        MAX_CHARS = 1_000_000
850
        
851
        if len(text) > MAX_CHARS:
852
            print(f"Warning: Text length ({len(text)}) exceeds recommended maximum ({MAX_CHARS})")
853
            return False
854
        
855
        return True
856
    
857
    @staticmethod  
858
    def optimize_text_for_synthesis(text: str) -> str:
859
        """Optimize text content for better synthesis."""
860
        import re
861
        
862
        # Remove excessive whitespace
863
        text = re.sub(r'\s+', ' ', text)
864
        
865
        # Add proper punctuation for better pacing
866
        text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)
867
        
868
        # Ensure paragraph breaks
869
        text = re.sub(r'\n\s*\n', '\n\n', text)
870
        
871
        return text.strip()
872
    
873
    @staticmethod
874
    def choose_optimal_voice(content_type: str, language: str = "en-US") -> str:
875
        """Choose optimal voice based on content type."""
876
        
877
        voice_recommendations = {
878
            "audiobook": f"{language}-Wavenet-A",      # Clear, pleasant for long listening
879
            "news": f"{language}-Neural2-C",           # Authoritative
880
            "educational": f"{language}-Neural2-A",    # Clear, engaging
881
            "documentation": f"{language}-Standard-A", # Clear, efficient
882
            "narrative": f"{language}-Wavenet-D"       # Expressive
883
        }
884
        
885
        return voice_recommendations.get(content_type, f"{language}-Neural2-A")
886
    
887
    @staticmethod
888
    def create_optimal_audio_config(use_case: str) -> texttospeech_v1.AudioConfig:
889
        """Create optimal audio configuration for different use cases."""
890
        
891
        configs = {
892
            "audiobook": texttospeech_v1.AudioConfig(
893
                audio_encoding=texttospeech_v1.AudioEncoding.MP3,
894
                sample_rate_hertz=22050,
895
                speaking_rate=0.9,
896
                volume_gain_db=2.0
897
            ),
898
            "podcast": texttospeech_v1.AudioConfig(
899
                audio_encoding=texttospeech_v1.AudioEncoding.MP3,
900
                sample_rate_hertz=44100,
901
                speaking_rate=1.0,
902
                volume_gain_db=1.0,
903
                effects_profile_id=["large-home-entertainment-class-device"]
904
            ),
905
            "telephony": texttospeech_v1.AudioConfig(
906
                audio_encoding=texttospeech_v1.AudioEncoding.MULAW,
907
                sample_rate_hertz=8000,
908
                speaking_rate=1.1,
909
                effects_profile_id=["telephony-class-application"]
910
            ),
911
            "archive": texttospeech_v1.AudioConfig(
912
                audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,
913
                sample_rate_hertz=48000,
914
                speaking_rate=1.0
915
            )
916
        }
917
        
918
        return configs.get(use_case, configs["audiobook"])
919

920
# Apply best practices
921
def create_optimized_long_audio_request(text_content: str, output_uri: str, 
922
                                       content_type: str = "audiobook"):
923
    """Create optimized long audio request following best practices."""
924
    
925
    # Validate and optimize text
926
    if not LongAudioBestPractices.validate_text_length(text_content):
927
        print("Consider breaking content into smaller chunks")
928
    
929
    optimized_text = LongAudioBestPractices.optimize_text_for_synthesis(text_content)
930
    
931
    # Choose optimal voice and config
932
    voice_name = LongAudioBestPractices.choose_optimal_voice(content_type)
933
    audio_config = LongAudioBestPractices.create_optimal_audio_config(content_type)
934
    
935
    # Create request
936
    request = texttospeech_v1.SynthesizeLongAudioRequest(
937
        parent="projects/your-project-id/locations/us-central1",
938
        input=texttospeech_v1.SynthesisInput(text=optimized_text),
939
        audio_config=audio_config,
940
        voice=texttospeech_v1.VoiceSelectionParams(
941
            language_code="en-US",
942
            name=voice_name
943
        ),
944
        output_gcs_uri=output_uri
945
    )
946
    
947
    return request
948
```

Version

Tile

Files

long-audio-synthesis.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

long-audio-synthesis.mddocs/