or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

async-clients.mdconfiguration-types.mdindex.mdlong-audio-synthesis.mdspeech-synthesis.mdstreaming-synthesis.mdvoice-management.md

long-audio-synthesis.mddocs/

0

# Long Audio Synthesis

1

2

## Overview

3

4

Long audio synthesis is designed for generating extended audio content that exceeds the limits of standard synthesis operations. It uses Google Cloud's long-running operations (LRO) pattern to handle large-scale text-to-speech generation asynchronously, with output delivered to Google Cloud Storage.

5

6

**Key Features:**

7

- Supports very large text inputs (up to several hours of audio)

8

- Asynchronous processing with operation monitoring

9

- Direct output to Google Cloud Storage

10

- Progress tracking and metadata

11

- Suitable for audiobooks, long documents, and batch processing

12

13

## Client Setup

14

15

### Long Audio Synthesis Clients

16

17

```api { .api }

18

from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize

19

20

# Synchronous long audio client

21

long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

22

23

# Asynchronous long audio client

24

async_long_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeAsyncClient()

25

26

# Alternative import paths

27

from google.cloud import texttospeech_v1

28

29

# Through main module

30

long_client = texttospeech_v1.services.text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

31

```

32

33

### Authentication and Project Setup

34

35

```api { .api }

36

import os

37

from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize

38

39

# Set up authentication (if not using default credentials)

40

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path/to/service-account-key.json'

41

42

# Initialize with explicit project

43

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

44

45

# Project and location information

46

PROJECT_ID = "your-project-id"

47

LOCATION = "us-central1" # or other supported location

48

PARENT = f"projects/{PROJECT_ID}/locations/{LOCATION}"

49

```

50

51

## Core Long Audio Operations

52

53

### Basic Long Audio Synthesis

54

55

```api { .api }

56

from google.cloud import texttospeech_v1

57

from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize

58

59

# Initialize client

60

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

61

62

# Create long audio synthesis request

63

request = texttospeech_v1.SynthesizeLongAudioRequest(

64

parent="projects/your-project-id/locations/us-central1",

65

input=texttospeech_v1.SynthesisInput(

66

text="This is a very long text that will be converted to audio. " * 100

67

),

68

audio_config=texttospeech_v1.AudioConfig(

69

audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,

70

sample_rate_hertz=22050

71

),

72

voice=texttospeech_v1.VoiceSelectionParams(

73

language_code="en-US",

74

name="en-US-Wavenet-A"

75

),

76

output_gcs_uri="gs://your-bucket-name/output-audio.wav"

77

)

78

79

# Start long-running operation

80

operation = client.synthesize_long_audio(request=request)

81

82

print(f"Operation name: {operation.name}")

83

print("Long audio synthesis started...")

84

85

# Wait for completion

86

result = operation.result() # Blocks until complete

87

88

print("Long audio synthesis completed!")

89

print(f"Result: {result}")

90

```

91

92

### SSML Long Audio Synthesis

93

94

```api { .api }

95

from google.cloud import texttospeech_v1

96

97

# Prepare long SSML content

98

long_ssml_content = """

99

<speak>

100

<p>

101

<s>Welcome to this long audio demonstration.</s>

102

<s>This content will be processed as a long-running operation.</s>

103

</p>

104

105

<break time="2s"/>

106

107

<p>

108

<s>Here we have multiple paragraphs with various SSML features.</s>

109

<s><prosody rate="slow">This part is spoken slowly.</prosody></s>

110

<s><prosody rate="fast">While this part is much faster.</prosody></s>

111

</p>

112

113

<break time="3s"/>

114

115

<p>

116

<s><emphasis level="strong">This is emphasized text.</emphasis></s>

117

<s>And this concludes our long audio sample.</s>

118

</p>

119

</speak>

120

"""

121

122

# Create request with SSML

123

request = texttospeech_v1.SynthesizeLongAudioRequest(

124

parent="projects/your-project-id/locations/us-central1",

125

input=texttospeech_v1.SynthesisInput(ssml=long_ssml_content),

126

audio_config=texttospeech_v1.AudioConfig(

127

audio_encoding=texttospeech_v1.AudioEncoding.MP3,

128

speaking_rate=1.0,

129

pitch=0.0,

130

volume_gain_db=0.0

131

),

132

voice=texttospeech_v1.VoiceSelectionParams(

133

language_code="en-US",

134

name="en-US-Neural2-A"

135

),

136

output_gcs_uri="gs://your-bucket-name/long-ssml-output.mp3"

137

)

138

139

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

140

operation = client.synthesize_long_audio(request=request)

141

```

142

143

## Request and Response Types

144

145

### SynthesizeLongAudioRequest

146

147

```api { .api }

148

from google.cloud.texttospeech_v1 import (

149

SynthesizeLongAudioRequest,

150

SynthesisInput,

151

AudioConfig,

152

VoiceSelectionParams,

153

AudioEncoding

154

)

155

156

# Complete long audio request configuration

157

request = SynthesizeLongAudioRequest(

158

parent="projects/your-project-id/locations/us-central1", # Required: parent resource

159

160

input=SynthesisInput(

161

text="Long text content to synthesize..." # or ssml="<speak>...</speak>"

162

),

163

164

audio_config=AudioConfig(

165

audio_encoding=AudioEncoding.LINEAR16, # Audio format

166

sample_rate_hertz=24000, # Sample rate

167

speaking_rate=1.0, # Speech rate

168

pitch=0.0, # Pitch adjustment

169

volume_gain_db=0.0, # Volume gain

170

effects_profile_id=["large-home-entertainment-class-device"] # Audio effects

171

),

172

173

voice=VoiceSelectionParams(

174

language_code="en-US", # Required: language

175

name="en-US-Wavenet-D", # Specific voice

176

ssml_gender=texttospeech_v1.SsmlVoiceGender.FEMALE

177

),

178

179

output_gcs_uri="gs://your-bucket-name/path/output.wav" # Required: GCS output location

180

)

181

182

# Request with custom pronunciations

183

request_with_pronunciations = SynthesizeLongAudioRequest(

184

parent="projects/your-project-id/locations/us-central1",

185

input=SynthesisInput(text="Text with custom pronunciations for API and JSON terms."),

186

audio_config=AudioConfig(

187

audio_encoding=AudioEncoding.MP3,

188

sample_rate_hertz=22050

189

),

190

voice=VoiceSelectionParams(

191

language_code="en-US",

192

name="en-US-Neural2-A",

193

custom_pronunciations=texttospeech_v1.CustomPronunciations(

194

pronunciations=[

195

texttospeech_v1.CustomPronunciationParams(

196

phrase="API",

197

ipa="ˌeɪ piː ˈaɪ",

198

phonetic_encoding=texttospeech_v1.CustomPronunciationParams.PhoneticEncoding.IPA

199

)

200

]

201

)

202

),

203

output_gcs_uri="gs://your-bucket-name/custom-pronunciation-output.mp3"

204

)

205

```

206

207

### SynthesizeLongAudioResponse and Metadata

208

209

```api { .api }

210

from google.cloud.texttospeech_v1 import SynthesizeLongAudioResponse, SynthesizeLongAudioMetadata

211

212

# Response object (returned when operation completes)

213

# SynthesizeLongAudioResponse is typically empty - the audio is written to GCS

214

215

# Metadata object (available during operation)

216

def process_operation_metadata(operation):

217

"""Process metadata from long-running operation."""

218

219

if operation.metadata:

220

# Metadata contains progress information

221

metadata = SynthesizeLongAudioMetadata()

222

operation.metadata.Unpack(metadata)

223

224

print(f"Progress: {metadata.progress_percentage}%")

225

print(f"Start time: {metadata.start_time}")

226

227

if metadata.last_update_time:

228

print(f"Last update: {metadata.last_update_time}")

229

230

return operation.metadata

231

232

# Access operation result

233

def get_operation_result(operation):

234

"""Get result from completed operation."""

235

236

if operation.done():

237

if operation.error:

238

print(f"Operation failed: {operation.error}")

239

return None

240

else:

241

result = operation.result()

242

print("Operation completed successfully")

243

# Result is typically empty - check GCS for output file

244

return result

245

else:

246

print(f"Operation still running: {operation.name}")

247

return None

248

```

249

250

## Operation Management

251

252

### Monitoring Long-Running Operations

253

254

```api { .api }

255

import time

256

from google.api_core import operation

257

from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize

258

259

def monitor_long_audio_operation(operation_name: str, check_interval: int = 30):

260

"""Monitor a long-running audio synthesis operation."""

261

262

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

263

264

# Get operation by name

265

op = client.get_operation(request={"name": operation_name})

266

267

print(f"Monitoring operation: {operation_name}")

268

269

while not op.done():

270

# Process metadata

271

if op.metadata:

272

try:

273

metadata = texttospeech_v1.SynthesizeLongAudioMetadata()

274

op.metadata.Unpack(metadata)

275

276

progress = getattr(metadata, 'progress_percentage', 0)

277

print(f"Progress: {progress}%")

278

279

if hasattr(metadata, 'start_time') and metadata.start_time:

280

print(f"Started at: {metadata.start_time}")

281

282

except Exception as e:

283

print(f"Could not parse metadata: {e}")

284

285

print(f"Operation still running. Checking again in {check_interval} seconds...")

286

time.sleep(check_interval)

287

288

# Refresh operation status

289

op = client.get_operation(request={"name": operation_name})

290

291

# Operation completed

292

if op.error:

293

print(f"Operation failed: {op.error}")

294

return False

295

else:

296

print("Operation completed successfully!")

297

print(f"Output should be available at the specified GCS URI")

298

return True

299

300

# Usage

301

# operation_name = "projects/your-project/locations/us-central1/operations/long-operation-id"

302

# success = monitor_long_audio_operation(operation_name)

303

```

304

305

### Cancelling Operations

306

307

```api { .api }

308

def cancel_long_audio_operation(operation_name: str):

309

"""Cancel a running long audio synthesis operation."""

310

311

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

312

313

try:

314

# Cancel the operation

315

client.cancel_operation(request={"name": operation_name})

316

print(f"Cancellation requested for operation: {operation_name}")

317

318

# Check if cancellation was successful

319

op = client.get_operation(request={"name": operation_name})

320

321

if op.done():

322

if op.cancelled():

323

print("Operation successfully cancelled")

324

return True

325

else:

326

print("Operation completed before cancellation")

327

return False

328

else:

329

print("Cancellation in progress...")

330

return True

331

332

except Exception as e:

333

print(f"Failed to cancel operation: {e}")

334

return False

335

336

# Usage

337

# cancel_long_audio_operation("projects/your-project/locations/us-central1/operations/op-id")

338

```

339

340

### Listing Operations

341

342

```api { .api }

343

def list_long_audio_operations(project_id: str, location: str = "us-central1"):

344

"""List all long audio synthesis operations for a project."""

345

346

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

347

348

parent = f"projects/{project_id}/locations/{location}"

349

350

try:

351

# List operations

352

operations = client.list_operations(request={"name": parent})

353

354

print(f"Operations in {parent}:")

355

356

for op in operations:

357

print(f"\nOperation: {op.name}")

358

print(f"Done: {op.done()}")

359

360

if op.done():

361

if op.error:

362

print(f"Error: {op.error}")

363

else:

364

print("Status: Completed successfully")

365

else:

366

print("Status: Running")

367

368

# Try to get metadata

369

if op.metadata:

370

try:

371

metadata = texttospeech_v1.SynthesizeLongAudioMetadata()

372

op.metadata.Unpack(metadata)

373

progress = getattr(metadata, 'progress_percentage', 0)

374

print(f"Progress: {progress}%")

375

except:

376

print("Progress: Unknown")

377

378

return operations

379

380

except Exception as e:

381

print(f"Failed to list operations: {e}")

382

return []

383

384

# Usage

385

# operations = list_long_audio_operations("your-project-id")

386

```

387

388

## Practical Examples

389

390

### Audiobook Generation

391

392

```api { .api }

393

import os

394

from google.cloud import storage

395

from google.cloud.texttospeech_v1.services import text_to_speech_long_audio_synthesize

396

397

class AudiobookGenerator:

398

"""Generate audiobooks from long text content."""

399

400

def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):

401

self.project_id = project_id

402

self.bucket_name = bucket_name

403

self.location = location

404

self.parent = f"projects/{project_id}/locations/{location}"

405

406

# Initialize clients

407

self.tts_client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

408

self.storage_client = storage.Client()

409

410

def generate_audiobook(self, text_content: str, output_filename: str,

411

voice_name: str = "en-US-Wavenet-A",

412

language_code: str = "en-US"):

413

"""Generate audiobook from text content."""

414

415

# Ensure GCS bucket exists

416

try:

417

bucket = self.storage_client.bucket(self.bucket_name)

418

if not bucket.exists():

419

bucket = self.storage_client.create_bucket(self.bucket_name)

420

print(f"Created bucket: {self.bucket_name}")

421

except Exception as e:

422

print(f"Bucket setup error: {e}")

423

return None

424

425

# Configure audiobook synthesis

426

gcs_uri = f"gs://{self.bucket_name}/{output_filename}"

427

428

request = texttospeech_v1.SynthesizeLongAudioRequest(

429

parent=self.parent,

430

input=texttospeech_v1.SynthesisInput(text=text_content),

431

audio_config=texttospeech_v1.AudioConfig(

432

audio_encoding=texttospeech_v1.AudioEncoding.MP3,

433

sample_rate_hertz=22050,

434

speaking_rate=0.9, # Slightly slower for audiobooks

435

volume_gain_db=2.0 # Boost volume

436

),

437

voice=texttospeech_v1.VoiceSelectionParams(

438

language_code=language_code,

439

name=voice_name

440

),

441

output_gcs_uri=gcs_uri

442

)

443

444

print(f"Starting audiobook generation...")

445

print(f"Output will be saved to: {gcs_uri}")

446

447

# Start synthesis

448

operation = self.tts_client.synthesize_long_audio(request=request)

449

450

return {

451

'operation': operation,

452

'operation_name': operation.name,

453

'output_uri': gcs_uri

454

}

455

456

def wait_for_audiobook(self, operation, check_interval: int = 60):

457

"""Wait for audiobook generation to complete."""

458

459

print("Waiting for audiobook generation to complete...")

460

461

while not operation.done():

462

# Get progress

463

if operation.metadata:

464

try:

465

metadata = texttospeech_v1.SynthesizeLongAudioMetadata()

466

operation.metadata.Unpack(metadata)

467

progress = getattr(metadata, 'progress_percentage', 0)

468

print(f"Progress: {progress}%")

469

except:

470

print("Checking progress...")

471

472

time.sleep(check_interval)

473

474

# Refresh operation

475

operation = self.tts_client.get_operation(

476

request={"name": operation.name}

477

)

478

479

if operation.error:

480

print(f"Audiobook generation failed: {operation.error}")

481

return False

482

else:

483

print("Audiobook generation completed successfully!")

484

return True

485

486

def download_audiobook(self, gcs_uri: str, local_filename: str):

487

"""Download generated audiobook from GCS."""

488

489

# Parse GCS URI

490

if not gcs_uri.startswith("gs://"):

491

raise ValueError("Invalid GCS URI")

492

493

path_parts = gcs_uri[5:].split("/", 1)

494

bucket_name = path_parts[0]

495

blob_name = path_parts[1]

496

497

# Download file

498

bucket = self.storage_client.bucket(bucket_name)

499

blob = bucket.blob(blob_name)

500

501

blob.download_to_filename(local_filename)

502

print(f"Audiobook downloaded to: {local_filename}")

503

504

# Get file info

505

file_size = os.path.getsize(local_filename)

506

print(f"File size: {file_size / (1024*1024):.2f} MB")

507

508

return local_filename

509

510

# Usage example

511

def generate_sample_audiobook():

512

"""Generate a sample audiobook."""

513

514

# Sample long text (could be loaded from file)

515

sample_text = """

516

Chapter 1: Introduction

517

518

Welcome to this sample audiobook demonstration. This text will be converted

519

into high-quality speech using Google Cloud Text-to-Speech long audio synthesis.

520

521

The long audio synthesis feature is specifically designed for content like this,

522

where the text is too long for standard synthesis operations. It processes the

523

content asynchronously and delivers the results to Google Cloud Storage.

524

525

Chapter 2: Features

526

527

Long audio synthesis supports all the same features as standard synthesis,

528

including SSML markup, custom voices, and audio configuration options.

529

The main difference is that it can handle much larger amounts of text

530

and processes them as long-running operations.

531

532

This makes it ideal for generating audiobooks, processing long documents,

533

or creating extended audio content for podcasts and presentations.

534

535

Chapter 3: Conclusion

536

537

Thank you for listening to this sample audiobook. The long audio synthesis

538

feature provides a powerful way to convert large amounts of text into

539

natural-sounding speech.

540

""" * 5 # Repeat to make it longer

541

542

# Generate audiobook

543

generator = AudiobookGenerator(

544

project_id="your-project-id",

545

bucket_name="your-audiobook-bucket"

546

)

547

548

result = generator.generate_audiobook(

549

text_content=sample_text,

550

output_filename="sample_audiobook.mp3",

551

voice_name="en-US-Wavenet-A"

552

)

553

554

if result:

555

# Wait for completion

556

success = generator.wait_for_audiobook(result['operation'])

557

558

if success:

559

# Download the result

560

generator.download_audiobook(

561

result['output_uri'],

562

"local_audiobook.mp3"

563

)

564

print("Audiobook generation complete!")

565

566

return result

567

568

return None

569

570

# Run the example

571

# audiobook_result = generate_sample_audiobook()

572

```

573

574

### Batch Document Processing

575

576

```api { .api }

577

import concurrent.futures

578

from typing import List, Dict

579

580

class BatchDocumentProcessor:

581

"""Process multiple documents for long audio synthesis."""

582

583

def __init__(self, project_id: str, bucket_name: str, location: str = "us-central1"):

584

self.project_id = project_id

585

self.bucket_name = bucket_name

586

self.location = location

587

self.parent = f"projects/{project_id}/locations/{location}"

588

589

self.client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

590

591

def process_document_batch(self, documents: List[Dict], max_workers: int = 5):

592

"""Process multiple documents in parallel."""

593

594

def process_single_document(doc_info):

595

"""Process a single document."""

596

try:

597

doc_name = doc_info['name']

598

text_content = doc_info['content']

599

voice_config = doc_info.get('voice', {})

600

audio_config = doc_info.get('audio', {})

601

602

# Default configurations

603

voice_name = voice_config.get('name', 'en-US-Wavenet-A')

604

language_code = voice_config.get('language_code', 'en-US')

605

606

audio_encoding = audio_config.get('encoding', texttospeech_v1.AudioEncoding.MP3)

607

sample_rate = audio_config.get('sample_rate', 22050)

608

609

# Create request

610

output_uri = f"gs://{self.bucket_name}/batch/{doc_name}.mp3"

611

612

request = texttospeech_v1.SynthesizeLongAudioRequest(

613

parent=self.parent,

614

input=texttospeech_v1.SynthesisInput(text=text_content),

615

audio_config=texttospeech_v1.AudioConfig(

616

audio_encoding=audio_encoding,

617

sample_rate_hertz=sample_rate

618

),

619

voice=texttospeech_v1.VoiceSelectionParams(

620

language_code=language_code,

621

name=voice_name

622

),

623

output_gcs_uri=output_uri

624

)

625

626

# Start synthesis

627

operation = self.client.synthesize_long_audio(request=request)

628

629

return {

630

'document': doc_name,

631

'operation_name': operation.name,

632

'output_uri': output_uri,

633

'success': True,

634

'operation': operation

635

}

636

637

except Exception as e:

638

return {

639

'document': doc_info['name'],

640

'operation_name': None,

641

'output_uri': None,

642

'success': False,

643

'error': str(e)

644

}

645

646

# Process documents in parallel

647

with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:

648

results = list(executor.map(process_single_document, documents))

649

650

return results

651

652

def monitor_batch_operations(self, operation_results: List[Dict],

653

check_interval: int = 30):

654

"""Monitor multiple long-running operations."""

655

656

pending_operations = [r for r in operation_results if r['success']]

657

completed_operations = []

658

659

print(f"Monitoring {len(pending_operations)} operations...")

660

661

while pending_operations:

662

still_pending = []

663

664

for op_result in pending_operations:

665

try:

666

# Check operation status

667

operation = self.client.get_operation(

668

request={"name": op_result['operation_name']}

669

)

670

671

if operation.done():

672

if operation.error:

673

op_result['final_status'] = 'failed'

674

op_result['error'] = str(operation.error)

675

print(f"❌ {op_result['document']}: Failed")

676

else:

677

op_result['final_status'] = 'completed'

678

print(f"✅ {op_result['document']}: Completed")

679

680

completed_operations.append(op_result)

681

else:

682

# Still running

683

if operation.metadata:

684

try:

685

metadata = texttospeech_v1.SynthesizeLongAudioMetadata()

686

operation.metadata.Unpack(metadata)

687

progress = getattr(metadata, 'progress_percentage', 0)

688

print(f"⏳ {op_result['document']}: {progress}%")

689

except:

690

print(f"⏳ {op_result['document']}: In progress...")

691

692

still_pending.append(op_result)

693

694

except Exception as e:

695

print(f"Error checking {op_result['document']}: {e}")

696

still_pending.append(op_result)

697

698

pending_operations = still_pending

699

700

if pending_operations:

701

print(f"\n{len(pending_operations)} operations still running. "

702

f"Checking again in {check_interval} seconds...\n")

703

time.sleep(check_interval)

704

705

print(f"\nBatch processing complete!")

706

print(f"Completed: {len([op for op in completed_operations if op.get('final_status') == 'completed'])}")

707

print(f"Failed: {len([op for op in completed_operations if op.get('final_status') == 'failed'])}")

708

709

return completed_operations

710

711

# Usage example

712

def batch_process_example():

713

"""Example of batch processing multiple documents."""

714

715

# Sample documents

716

documents = [

717

{

718

'name': 'document1',

719

'content': 'This is the first document content. ' * 100,

720

'voice': {'name': 'en-US-Neural2-A', 'language_code': 'en-US'},

721

'audio': {'encoding': texttospeech_v1.AudioEncoding.MP3, 'sample_rate': 22050}

722

},

723

{

724

'name': 'document2',

725

'content': 'This is the second document content. ' * 100,

726

'voice': {'name': 'en-US-Wavenet-D', 'language_code': 'en-US'},

727

'audio': {'encoding': texttospeech_v1.AudioEncoding.LINEAR16, 'sample_rate': 24000}

728

},

729

{

730

'name': 'document3',

731

'content': 'This is the third document content. ' * 100,

732

'voice': {'name': 'en-US-Standard-B', 'language_code': 'en-US'},

733

'audio': {'encoding': texttospeech_v1.AudioEncoding.OGG_OPUS, 'sample_rate': 48000}

734

}

735

]

736

737

# Process batch

738

processor = BatchDocumentProcessor(

739

project_id="your-project-id",

740

bucket_name="your-batch-bucket"

741

)

742

743

# Start batch processing

744

results = processor.process_document_batch(documents, max_workers=3)

745

746

# Monitor operations

747

final_results = processor.monitor_batch_operations(results)

748

749

return final_results

750

751

# Run batch processing

752

# batch_results = batch_process_example()

753

```

754

755

## Error Handling and Best Practices

756

757

### Comprehensive Error Handling

758

759

```api { .api }

760

from google.api_core import exceptions

761

import logging

762

763

def robust_long_audio_synthesis(text_content: str, output_gcs_uri: str,

764

project_id: str, location: str = "us-central1"):

765

"""Long audio synthesis with comprehensive error handling."""

766

767

client = text_to_speech_long_audio_synthesize.TextToSpeechLongAudioSynthesizeClient()

768

parent = f"projects/{project_id}/locations/{location}"

769

770

try:

771

# Validate inputs

772

if not text_content or not text_content.strip():

773

raise ValueError("Text content cannot be empty")

774

775

if not output_gcs_uri.startswith("gs://"):

776

raise ValueError("Output URI must be a valid GCS URI (gs://...)")

777

778

# Create request

779

request = texttospeech_v1.SynthesizeLongAudioRequest(

780

parent=parent,

781

input=texttospeech_v1.SynthesisInput(text=text_content),

782

audio_config=texttospeech_v1.AudioConfig(

783

audio_encoding=texttospeech_v1.AudioEncoding.MP3,

784

sample_rate_hertz=22050

785

),

786

voice=texttospeech_v1.VoiceSelectionParams(

787

language_code="en-US",

788

name="en-US-Neural2-A"

789

),

790

output_gcs_uri=output_gcs_uri

791

)

792

793

# Start operation

794

operation = client.synthesize_long_audio(request=request)

795

796

return {

797

'success': True,

798

'operation': operation,

799

'operation_name': operation.name

800

}

801

802

except exceptions.InvalidArgument as e:

803

logging.error(f"Invalid request parameters: {e}")

804

return {'success': False, 'error': 'Invalid parameters', 'details': str(e)}

805

806

except exceptions.PermissionDenied as e:

807

logging.error(f"Permission denied: {e}")

808

return {'success': False, 'error': 'Permission denied', 'details': str(e)}

809

810

except exceptions.ResourceExhausted as e:

811

logging.error(f"Quota exceeded: {e}")

812

return {'success': False, 'error': 'Quota exceeded', 'details': str(e)}

813

814

except exceptions.FailedPrecondition as e:

815

logging.error(f"Failed precondition: {e}")

816

return {'success': False, 'error': 'Precondition failed', 'details': str(e)}

817

818

except exceptions.NotFound as e:

819

logging.error(f"Resource not found: {e}")

820

return {'success': False, 'error': 'Resource not found', 'details': str(e)}

821

822

except Exception as e:

823

logging.error(f"Unexpected error: {e}")

824

return {'success': False, 'error': 'Unexpected error', 'details': str(e)}

825

826

# Usage with error handling

827

result = robust_long_audio_synthesis(

828

text_content="Long text content...",

829

output_gcs_uri="gs://your-bucket/output.mp3",

830

project_id="your-project-id"

831

)

832

833

if result['success']:

834

print(f"Operation started: {result['operation_name']}")

835

else:

836

print(f"Error: {result['error']} - {result['details']}")

837

```

838

839

### Best Practices for Long Audio Synthesis

840

841

```api { .api }

842

class LongAudioBestPractices:

843

"""Best practices for long audio synthesis."""

844

845

@staticmethod

846

def validate_text_length(text: str) -> bool:

847

"""Validate text length for long audio synthesis."""

848

# Recommended maximum: ~1 million characters

849

MAX_CHARS = 1_000_000

850

851

if len(text) > MAX_CHARS:

852

print(f"Warning: Text length ({len(text)}) exceeds recommended maximum ({MAX_CHARS})")

853

return False

854

855

return True

856

857

@staticmethod

858

def optimize_text_for_synthesis(text: str) -> str:

859

"""Optimize text content for better synthesis."""

860

import re

861

862

# Remove excessive whitespace

863

text = re.sub(r'\s+', ' ', text)

864

865

# Add proper punctuation for better pacing

866

text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text)

867

868

# Ensure paragraph breaks

869

text = re.sub(r'\n\s*\n', '\n\n', text)

870

871

return text.strip()

872

873

@staticmethod

874

def choose_optimal_voice(content_type: str, language: str = "en-US") -> str:

875

"""Choose optimal voice based on content type."""

876

877

voice_recommendations = {

878

"audiobook": f"{language}-Wavenet-A", # Clear, pleasant for long listening

879

"news": f"{language}-Neural2-C", # Authoritative

880

"educational": f"{language}-Neural2-A", # Clear, engaging

881

"documentation": f"{language}-Standard-A", # Clear, efficient

882

"narrative": f"{language}-Wavenet-D" # Expressive

883

}

884

885

return voice_recommendations.get(content_type, f"{language}-Neural2-A")

886

887

@staticmethod

888

def create_optimal_audio_config(use_case: str) -> texttospeech_v1.AudioConfig:

889

"""Create optimal audio configuration for different use cases."""

890

891

configs = {

892

"audiobook": texttospeech_v1.AudioConfig(

893

audio_encoding=texttospeech_v1.AudioEncoding.MP3,

894

sample_rate_hertz=22050,

895

speaking_rate=0.9,

896

volume_gain_db=2.0

897

),

898

"podcast": texttospeech_v1.AudioConfig(

899

audio_encoding=texttospeech_v1.AudioEncoding.MP3,

900

sample_rate_hertz=44100,

901

speaking_rate=1.0,

902

volume_gain_db=1.0,

903

effects_profile_id=["large-home-entertainment-class-device"]

904

),

905

"telephony": texttospeech_v1.AudioConfig(

906

audio_encoding=texttospeech_v1.AudioEncoding.MULAW,

907

sample_rate_hertz=8000,

908

speaking_rate=1.1,

909

effects_profile_id=["telephony-class-application"]

910

),

911

"archive": texttospeech_v1.AudioConfig(

912

audio_encoding=texttospeech_v1.AudioEncoding.LINEAR16,

913

sample_rate_hertz=48000,

914

speaking_rate=1.0

915

)

916

}

917

918

return configs.get(use_case, configs["audiobook"])

919

920

# Apply best practices

921

def create_optimized_long_audio_request(text_content: str, output_uri: str,

922

content_type: str = "audiobook"):

923

"""Create optimized long audio request following best practices."""

924

925

# Validate and optimize text

926

if not LongAudioBestPractices.validate_text_length(text_content):

927

print("Consider breaking content into smaller chunks")

928

929

optimized_text = LongAudioBestPractices.optimize_text_for_synthesis(text_content)

930

931

# Choose optimal voice and config

932

voice_name = LongAudioBestPractices.choose_optimal_voice(content_type)

933

audio_config = LongAudioBestPractices.create_optimal_audio_config(content_type)

934

935

# Create request

936

request = texttospeech_v1.SynthesizeLongAudioRequest(

937

parent="projects/your-project-id/locations/us-central1",

938

input=texttospeech_v1.SynthesisInput(text=optimized_text),

939

audio_config=audio_config,

940

voice=texttospeech_v1.VoiceSelectionParams(

941

language_code="en-US",

942

name=voice_name

943

),

944

output_gcs_uri=output_uri

945

)

946

947

return request

948

```