0
# Speech Adaptation
1
2
Custom speech model adaptation using phrase sets and custom word classes to improve recognition accuracy for domain-specific vocabulary, names, technical terms, and specialized language patterns.
3
4
## Capabilities
5
6
### AdaptationClient
7
8
Client for managing speech adaptation resources including phrase sets and custom classes.
9
10
```python { .api }
11
class AdaptationClient:
12
"""Client for the Speech Adaptation service."""
13
14
def __init__(
15
self,
16
*,
17
credentials: Optional[ga_credentials.Credentials] = None,
18
transport: Optional[str] = None,
19
client_options: Optional[client_options_lib.ClientOptions] = None,
20
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
21
): ...
22
```
23
24
### Phrase Set Management
25
26
Create and manage custom phrase sets to improve recognition of specific words and phrases.
27
28
```python { .api }
29
def create_phrase_set(
30
self,
31
request: CreatePhraseSetRequest,
32
*,
33
retry: OptionalRetry = None,
34
timeout: Optional[float] = None,
35
metadata: Sequence[Tuple[str, str]] = ()
36
) -> PhraseSet:
37
"""
38
Create a custom phrase set.
39
40
Parameters:
41
- request: The request object containing phrase set configuration
42
- retry: Retry configuration for failed requests
43
- timeout: Request timeout in seconds
44
- metadata: Additional metadata to send with the request
45
46
Returns:
47
PhraseSet: The created phrase set
48
49
Raises:
50
google.api_core.exceptions.InvalidArgument: If the request is malformed
51
"""
52
53
def get_phrase_set(
54
self,
55
request: GetPhraseSetRequest,
56
*,
57
retry: OptionalRetry = None,
58
timeout: Optional[float] = None,
59
metadata: Sequence[Tuple[str, str]] = ()
60
) -> PhraseSet:
61
"""Retrieve a phrase set by name."""
62
63
def list_phrase_sets(
64
self,
65
request: ListPhraseSetRequest,
66
*,
67
retry: OptionalRetry = None,
68
timeout: Optional[float] = None,
69
metadata: Sequence[Tuple[str, str]] = ()
70
) -> ListPhraseSetResponse:
71
"""List phrase sets in a project."""
72
73
def update_phrase_set(
74
self,
75
request: UpdatePhraseSetRequest,
76
*,
77
retry: OptionalRetry = None,
78
timeout: Optional[float] = None,
79
metadata: Sequence[Tuple[str, str]] = ()
80
) -> PhraseSet:
81
"""Update an existing phrase set."""
82
83
def delete_phrase_set(
84
self,
85
request: DeletePhraseSetRequest,
86
*,
87
retry: OptionalRetry = None,
88
timeout: Optional[float] = None,
89
metadata: Sequence[Tuple[str, str]] = ()
90
) -> None:
91
"""Delete a phrase set."""
92
```
93
94
### Custom Class Management
95
96
Create and manage custom word classes for handling specialized terminology.
97
98
```python { .api }
99
def create_custom_class(
100
self,
101
request: CreateCustomClassRequest,
102
*,
103
retry: OptionalRetry = None,
104
timeout: Optional[float] = None,
105
metadata: Sequence[Tuple[str, str]] = ()
106
) -> CustomClass:
107
"""
108
Create a custom class for specialized vocabulary.
109
110
Parameters:
111
- request: The request object containing custom class configuration
112
- retry: Retry configuration for failed requests
113
- timeout: Request timeout in seconds
114
- metadata: Additional metadata to send with the request
115
116
Returns:
117
CustomClass: The created custom class
118
"""
119
120
def get_custom_class(
121
self,
122
request: GetCustomClassRequest,
123
*,
124
retry: OptionalRetry = None,
125
timeout: Optional[float] = None,
126
metadata: Sequence[Tuple[str, str]] = ()
127
) -> CustomClass:
128
"""Retrieve a custom class by name."""
129
130
def list_custom_classes(
131
self,
132
request: ListCustomClassesRequest,
133
*,
134
retry: OptionalRetry = None,
135
timeout: Optional[float] = None,
136
metadata: Sequence[Tuple[str, str]] = ()
137
) -> ListCustomClassesResponse:
138
"""List custom classes in a project."""
139
140
def update_custom_class(
141
self,
142
request: UpdateCustomClassRequest,
143
*,
144
retry: OptionalRetry = None,
145
timeout: Optional[float] = None,
146
metadata: Sequence[Tuple[str, str]] = ()
147
) -> CustomClass:
148
"""Update an existing custom class."""
149
150
def delete_custom_class(
151
self,
152
request: DeleteCustomClassRequest,
153
*,
154
retry: OptionalRetry = None,
155
timeout: Optional[float] = None,
156
metadata: Sequence[Tuple[str, str]] = ()
157
) -> None:
158
"""Delete a custom class."""
159
```
160
161
## Usage Examples
162
163
### Creating and Using Phrase Sets
164
165
```python
166
from google.cloud import speech
167
168
# Initialize clients
169
speech_client = speech.SpeechClient()
170
adaptation_client = speech.AdaptationClient()
171
172
# Create a phrase set for technical terms
173
phrase_set_request = speech.CreatePhraseSetRequest(
174
parent="projects/your-project-id/locations/global",
175
phrase_set_id="technical-terms",
176
phrase_set=speech.PhraseSet(
177
phrases=[
178
speech.PhraseSet.Phrase(value="Kubernetes", boost=10.0),
179
speech.PhraseSet.Phrase(value="Docker container", boost=10.0),
180
speech.PhraseSet.Phrase(value="microservices", boost=5.0),
181
speech.PhraseSet.Phrase(value="API endpoint", boost=5.0),
182
]
183
)
184
)
185
186
phrase_set = adaptation_client.create_phrase_set(request=phrase_set_request)
187
print(f"Created phrase set: {phrase_set.name}")
188
189
# Use the phrase set in recognition
190
config = speech.RecognitionConfig(
191
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
192
sample_rate_hertz=16000,
193
language_code="en-US",
194
speech_contexts=[
195
speech.SpeechContext(
196
speech_adaptation=speech.SpeechAdaptation(
197
phrase_sets=[phrase_set.name]
198
)
199
)
200
],
201
)
202
203
# Perform recognition with custom phrases
204
audio = speech.RecognitionAudio(content=audio_content)
205
response = speech_client.recognize(config=config, audio=audio)
206
```
207
208
### Creating and Using Custom Classes
209
210
```python
211
from google.cloud import speech
212
213
adaptation_client = speech.AdaptationClient()
214
215
# Create a custom class for product names
216
custom_class_request = speech.CreateCustomClassRequest(
217
parent="projects/your-project-id/locations/global",
218
custom_class_id="product-names",
219
custom_class=speech.CustomClass(
220
items=[
221
speech.CustomClass.ClassItem(value="TechWidget Pro"),
222
speech.CustomClass.ClassItem(value="DataSync Enterprise"),
223
speech.CustomClass.ClassItem(value="CloudFlow Analytics"),
224
]
225
)
226
)
227
228
custom_class = adaptation_client.create_custom_class(request=custom_class_request)
229
print(f"Created custom class: {custom_class.name}")
230
231
# Use custom class in phrase sets
232
phrase_set_with_class = speech.PhraseSet(
233
phrases=[
234
speech.PhraseSet.Phrase(
235
value=f"I need to configure ${{{custom_class.name}}}",
236
boost=15.0
237
),
238
speech.PhraseSet.Phrase(
239
value=f"The latest version of ${{{custom_class.name}}}",
240
boost=10.0
241
),
242
]
243
)
244
245
# Create phrase set that references the custom class
246
phrase_set_request = speech.CreatePhraseSetRequest(
247
parent="projects/your-project-id/locations/global",
248
phrase_set_id="product-phrases",
249
phrase_set=phrase_set_with_class
250
)
251
252
phrase_set = adaptation_client.create_phrase_set(request=phrase_set_request)
253
```
254
255
### Managing Adaptation Resources
256
257
```python
258
from google.cloud import speech
259
260
adaptation_client = speech.AdaptationClient()
261
project_path = "projects/your-project-id/locations/global"
262
263
# List all phrase sets
264
list_request = speech.ListPhraseSetRequest(parent=project_path)
265
phrase_sets = adaptation_client.list_phrase_sets(request=list_request)
266
267
print("Existing phrase sets:")
268
for phrase_set in phrase_sets.phrase_sets:
269
print(f"- {phrase_set.name}: {len(phrase_set.phrases)} phrases")
270
271
# List all custom classes
272
class_request = speech.ListCustomClassesRequest(parent=project_path)
273
custom_classes = adaptation_client.list_custom_classes(request=class_request)
274
275
print("Existing custom classes:")
276
for custom_class in custom_classes.custom_classes:
277
print(f"- {custom_class.name}: {len(custom_class.items)} items")
278
279
# Update a phrase set
280
existing_phrase_set = phrase_sets.phrase_sets[0]
281
existing_phrase_set.phrases.append(
282
speech.PhraseSet.Phrase(value="new technical term", boost=8.0)
283
)
284
285
update_request = speech.UpdatePhraseSetRequest(phrase_set=existing_phrase_set)
286
updated_phrase_set = adaptation_client.update_phrase_set(request=update_request)
287
```
288
289
## Resource Types
290
291
### PhraseSet
292
293
```python { .api }
294
class PhraseSet:
295
"""A set of words or phrases to improve recognition accuracy."""
296
name: str
297
phrases: Sequence[Phrase]
298
boost: float
299
display_name: str
300
uid: str
301
create_time: Timestamp
302
update_time: Timestamp
303
delete_time: Timestamp
304
expire_time: Timestamp
305
annotations: Mapping[str, str]
306
etag: str
307
reconciling: bool
308
state: State
309
kms_key_name: str
310
kms_key_version_name: str
311
312
class Phrase:
313
"""Individual phrase with optional boost."""
314
value: str
315
boost: float
316
317
class State:
318
"""Lifecycle state of the phrase set."""
319
STATE_UNSPECIFIED = 0
320
ACTIVE = 2
321
DELETE_REQUESTED = 3
322
```
323
324
### CustomClass
325
326
```python { .api }
327
class CustomClass:
328
"""A custom class for domain-specific vocabulary."""
329
name: str
330
custom_class_id: str
331
items: Sequence[ClassItem]
332
display_name: str
333
uid: str
334
create_time: Timestamp
335
update_time: Timestamp
336
delete_time: Timestamp
337
expire_time: Timestamp
338
annotations: Mapping[str, str]
339
etag: str
340
reconciling: bool
341
state: State
342
kms_key_name: str
343
kms_key_version_name: str
344
345
class ClassItem:
346
"""Individual item in a custom class."""
347
value: str
348
349
class State:
350
"""Lifecycle state of the custom class."""
351
STATE_UNSPECIFIED = 0
352
ACTIVE = 2
353
DELETE_REQUESTED = 3
354
```
355
356
### SpeechAdaptation
357
358
```python { .api }
359
class SpeechAdaptation:
360
"""Configuration for speech adaptation."""
361
phrase_sets: Sequence[str] # References to phrase sets
362
phrase_set_references: Sequence[str] # Alternative phrase set references
363
custom_classes: Sequence[CustomClass] # Inline custom classes
364
abnf_grammar: ABNFGrammar # ABNF grammar specification
365
```
366
367
## Request/Response Types
368
369
### Phrase Set Requests
370
371
```python { .api }
372
class CreatePhraseSetRequest:
373
"""Request to create a phrase set."""
374
parent: str
375
phrase_set_id: str
376
phrase_set: PhraseSet
377
378
class GetPhraseSetRequest:
379
"""Request to retrieve a phrase set."""
380
name: str
381
382
class ListPhraseSetRequest:
383
"""Request to list phrase sets."""
384
parent: str
385
page_size: int
386
page_token: str
387
388
class ListPhraseSetResponse:
389
"""Response containing phrase sets."""
390
phrase_sets: Sequence[PhraseSet]
391
next_page_token: str
392
393
class UpdatePhraseSetRequest:
394
"""Request to update a phrase set."""
395
phrase_set: PhraseSet
396
update_mask: FieldMask
397
398
class DeletePhraseSetRequest:
399
"""Request to delete a phrase set."""
400
name: str
401
```
402
403
### Custom Class Requests
404
405
```python { .api }
406
class CreateCustomClassRequest:
407
"""Request to create a custom class."""
408
parent: str
409
custom_class_id: str
410
custom_class: CustomClass
411
412
class GetCustomClassRequest:
413
"""Request to retrieve a custom class."""
414
name: str
415
416
class ListCustomClassesRequest:
417
"""Request to list custom classes."""
418
parent: str
419
page_size: int
420
page_token: str
421
422
class ListCustomClassesResponse:
423
"""Response containing custom classes."""
424
custom_classes: Sequence[CustomClass]
425
next_page_token: str
426
427
class UpdateCustomClassRequest:
428
"""Request to update a custom class."""
429
custom_class: CustomClass
430
update_mask: FieldMask
431
432
class DeleteCustomClassRequest:
433
"""Request to delete a custom class."""
434
name: str
435
```
436
437
## Best Practices
438
439
### Phrase Set Optimization
440
441
```python
442
# Effective phrase set design
443
phrase_set = speech.PhraseSet(
444
phrases=[
445
# Use realistic boost values (1.0-20.0)
446
speech.PhraseSet.Phrase(value="critical term", boost=15.0),
447
speech.PhraseSet.Phrase(value="important phrase", boost=10.0),
448
speech.PhraseSet.Phrase(value="common word", boost=5.0),
449
450
# Include variations and common misspellings
451
speech.PhraseSet.Phrase(value="color", boost=5.0),
452
speech.PhraseSet.Phrase(value="colour", boost=5.0),
453
454
# Use context-specific phrases
455
speech.PhraseSet.Phrase(value="machine learning model", boost=10.0),
456
speech.PhraseSet.Phrase(value="neural network", boost=8.0),
457
]
458
)
459
```
460
461
### Resource Naming Conventions
462
463
```python
464
# Consistent naming for resources
465
project_id = "your-project-id"
466
location = "global" # or specific region like "us-central1"
467
468
# Phrase set naming
469
phrase_set_id = "medical-terminology" # Descriptive, lowercase, hyphenated
470
phrase_set_name = f"projects/{project_id}/locations/{location}/phraseSets/{phrase_set_id}"
471
472
# Custom class naming
473
custom_class_id = "drug-names"
474
custom_class_name = f"projects/{project_id}/locations/{location}/customClasses/{custom_class_id}"
475
```
476
477
### Adaptation Performance
478
479
```python
480
# Limit the number of adaptations for optimal performance
481
config = speech.RecognitionConfig(
482
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
483
sample_rate_hertz=16000,
484
language_code="en-US",
485
speech_contexts=[
486
speech.SpeechContext(
487
speech_adaptation=speech.SpeechAdaptation(
488
phrase_sets=[
489
"projects/project/locations/global/phraseSets/set1",
490
"projects/project/locations/global/phraseSets/set2",
491
# Limit to 2-3 phrase sets for best performance
492
]
493
)
494
)
495
],
496
# Alternative: use direct phrase contexts for simple cases
497
# speech_contexts=[
498
# speech.SpeechContext(
499
# phrases=["simple", "phrase", "list"]
500
# )
501
# ],
502
)