CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-google-cloud-speech

Google Cloud Speech API client library for speech-to-text conversion with support for real-time streaming, batch processing, and advanced speech recognition models

Pending
Overview
Eval results
Files

speech-adaptation.mddocs/

Speech Adaptation

Custom speech model adaptation using phrase sets and custom word classes to improve recognition accuracy for domain-specific vocabulary, names, technical terms, and specialized language patterns.

Capabilities

AdaptationClient

Client for managing speech adaptation resources including phrase sets and custom classes.

class AdaptationClient:
    """Client for the Speech Adaptation service."""
    
    def __init__(
        self,
        *,
        credentials: Optional[ga_credentials.Credentials] = None,
        transport: Optional[str] = None,
        client_options: Optional[client_options_lib.ClientOptions] = None,
        client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
    ): ...

Phrase Set Management

Create and manage custom phrase sets to improve recognition of specific words and phrases.

def create_phrase_set(
    self,
    request: CreatePhraseSetRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> PhraseSet:
    """
    Create a custom phrase set.

    Parameters:
    - request: The request object containing phrase set configuration
    - retry: Retry configuration for failed requests
    - timeout: Request timeout in seconds
    - metadata: Additional metadata to send with the request

    Returns:
    PhraseSet: The created phrase set

    Raises:
    google.api_core.exceptions.InvalidArgument: If the request is malformed
    """

def get_phrase_set(
    self,
    request: GetPhraseSetRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> PhraseSet:
    """Retrieve a phrase set by name."""

def list_phrase_sets(
    self,
    request: ListPhraseSetRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> ListPhraseSetResponse:
    """List phrase sets in a project."""

def update_phrase_set(
    self,
    request: UpdatePhraseSetRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> PhraseSet:
    """Update an existing phrase set."""

def delete_phrase_set(
    self,
    request: DeletePhraseSetRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> None:
    """Delete a phrase set."""

Custom Class Management

Create and manage custom word classes for handling specialized terminology.

def create_custom_class(
    self,
    request: CreateCustomClassRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> CustomClass:
    """
    Create a custom class for specialized vocabulary.

    Parameters:
    - request: The request object containing custom class configuration
    - retry: Retry configuration for failed requests
    - timeout: Request timeout in seconds
    - metadata: Additional metadata to send with the request

    Returns:
    CustomClass: The created custom class
    """

def get_custom_class(
    self,
    request: GetCustomClassRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> CustomClass:
    """Retrieve a custom class by name."""

def list_custom_classes(
    self,
    request: ListCustomClassesRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> ListCustomClassesResponse:
    """List custom classes in a project."""

def update_custom_class(
    self,
    request: UpdateCustomClassRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> CustomClass:
    """Update an existing custom class."""

def delete_custom_class(
    self,
    request: DeleteCustomClassRequest,
    *,
    retry: OptionalRetry = None,
    timeout: Optional[float] = None,
    metadata: Sequence[Tuple[str, str]] = ()
) -> None:
    """Delete a custom class."""

Usage Examples

Creating and Using Phrase Sets

from google.cloud import speech

# Initialize clients
speech_client = speech.SpeechClient()
adaptation_client = speech.AdaptationClient()

# Create a phrase set for technical terms
phrase_set_request = speech.CreatePhraseSetRequest(
    parent="projects/your-project-id/locations/global",
    phrase_set_id="technical-terms",
    phrase_set=speech.PhraseSet(
        phrases=[
            speech.PhraseSet.Phrase(value="Kubernetes", boost=10.0),
            speech.PhraseSet.Phrase(value="Docker container", boost=10.0),
            speech.PhraseSet.Phrase(value="microservices", boost=5.0),
            speech.PhraseSet.Phrase(value="API endpoint", boost=5.0),
        ]
    )
)

phrase_set = adaptation_client.create_phrase_set(request=phrase_set_request)
print(f"Created phrase set: {phrase_set.name}")

# Use the phrase set in recognition
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code="en-US",
    speech_contexts=[
        speech.SpeechContext(
            speech_adaptation=speech.SpeechAdaptation(
                phrase_sets=[phrase_set.name]
            )
        )
    ],
)

# Perform recognition with custom phrases
audio = speech.RecognitionAudio(content=audio_content)
response = speech_client.recognize(config=config, audio=audio)

Creating and Using Custom Classes

from google.cloud import speech

adaptation_client = speech.AdaptationClient()

# Create a custom class for product names
custom_class_request = speech.CreateCustomClassRequest(
    parent="projects/your-project-id/locations/global",
    custom_class_id="product-names",
    custom_class=speech.CustomClass(
        items=[
            speech.CustomClass.ClassItem(value="TechWidget Pro"),
            speech.CustomClass.ClassItem(value="DataSync Enterprise"),
            speech.CustomClass.ClassItem(value="CloudFlow Analytics"),
        ]
    )
)

custom_class = adaptation_client.create_custom_class(request=custom_class_request)
print(f"Created custom class: {custom_class.name}")

# Use custom class in phrase sets
phrase_set_with_class = speech.PhraseSet(
    phrases=[
        speech.PhraseSet.Phrase(
            value=f"I need to configure ${{{custom_class.name}}}",
            boost=15.0
        ),
        speech.PhraseSet.Phrase(
            value=f"The latest version of ${{{custom_class.name}}}",
            boost=10.0
        ),
    ]
)

# Create phrase set that references the custom class
phrase_set_request = speech.CreatePhraseSetRequest(
    parent="projects/your-project-id/locations/global",
    phrase_set_id="product-phrases",
    phrase_set=phrase_set_with_class
)

phrase_set = adaptation_client.create_phrase_set(request=phrase_set_request)

Managing Adaptation Resources

from google.cloud import speech

adaptation_client = speech.AdaptationClient()
project_path = "projects/your-project-id/locations/global"

# List all phrase sets
list_request = speech.ListPhraseSetRequest(parent=project_path)
phrase_sets = adaptation_client.list_phrase_sets(request=list_request)

print("Existing phrase sets:")
for phrase_set in phrase_sets.phrase_sets:
    print(f"- {phrase_set.name}: {len(phrase_set.phrases)} phrases")

# List all custom classes
class_request = speech.ListCustomClassesRequest(parent=project_path)
custom_classes = adaptation_client.list_custom_classes(request=class_request)

print("Existing custom classes:")
for custom_class in custom_classes.custom_classes:
    print(f"- {custom_class.name}: {len(custom_class.items)} items")

# Update a phrase set
existing_phrase_set = phrase_sets.phrase_sets[0]
existing_phrase_set.phrases.append(
    speech.PhraseSet.Phrase(value="new technical term", boost=8.0)
)

update_request = speech.UpdatePhraseSetRequest(phrase_set=existing_phrase_set)
updated_phrase_set = adaptation_client.update_phrase_set(request=update_request)

Resource Types

PhraseSet

class PhraseSet:
    """A set of words or phrases to improve recognition accuracy."""
    name: str
    phrases: Sequence[Phrase]
    boost: float
    display_name: str
    uid: str
    create_time: Timestamp
    update_time: Timestamp
    delete_time: Timestamp
    expire_time: Timestamp
    annotations: Mapping[str, str]
    etag: str
    reconciling: bool
    state: State
    kms_key_name: str
    kms_key_version_name: str
    
    class Phrase:
        """Individual phrase with optional boost."""
        value: str
        boost: float
    
    class State:
        """Lifecycle state of the phrase set."""
        STATE_UNSPECIFIED = 0
        ACTIVE = 2
        DELETE_REQUESTED = 3

CustomClass

class CustomClass:
    """A custom class for domain-specific vocabulary."""
    name: str
    custom_class_id: str
    items: Sequence[ClassItem]
    display_name: str
    uid: str
    create_time: Timestamp
    update_time: Timestamp
    delete_time: Timestamp
    expire_time: Timestamp
    annotations: Mapping[str, str]
    etag: str
    reconciling: bool
    state: State
    kms_key_name: str
    kms_key_version_name: str
    
    class ClassItem:
        """Individual item in a custom class."""
        value: str
    
    class State:
        """Lifecycle state of the custom class."""
        STATE_UNSPECIFIED = 0
        ACTIVE = 2
        DELETE_REQUESTED = 3

SpeechAdaptation

class SpeechAdaptation:
    """Configuration for speech adaptation."""
    phrase_sets: Sequence[str]           # References to phrase sets
    phrase_set_references: Sequence[str]  # Alternative phrase set references
    custom_classes: Sequence[CustomClass] # Inline custom classes
    abnf_grammar: ABNFGrammar            # ABNF grammar specification

Request/Response Types

Phrase Set Requests

class CreatePhraseSetRequest:
    """Request to create a phrase set."""
    parent: str
    phrase_set_id: str
    phrase_set: PhraseSet

class GetPhraseSetRequest:
    """Request to retrieve a phrase set."""
    name: str

class ListPhraseSetRequest:
    """Request to list phrase sets."""
    parent: str
    page_size: int
    page_token: str

class ListPhraseSetResponse:
    """Response containing phrase sets."""
    phrase_sets: Sequence[PhraseSet]
    next_page_token: str

class UpdatePhraseSetRequest:
    """Request to update a phrase set."""
    phrase_set: PhraseSet
    update_mask: FieldMask

class DeletePhraseSetRequest:
    """Request to delete a phrase set."""
    name: str

Custom Class Requests

class CreateCustomClassRequest:
    """Request to create a custom class."""
    parent: str
    custom_class_id: str
    custom_class: CustomClass

class GetCustomClassRequest:
    """Request to retrieve a custom class."""
    name: str

class ListCustomClassesRequest:
    """Request to list custom classes."""
    parent: str
    page_size: int
    page_token: str

class ListCustomClassesResponse:
    """Response containing custom classes."""
    custom_classes: Sequence[CustomClass]
    next_page_token: str

class UpdateCustomClassRequest:
    """Request to update a custom class."""
    custom_class: CustomClass
    update_mask: FieldMask

class DeleteCustomClassRequest:
    """Request to delete a custom class."""
    name: str

Best Practices

Phrase Set Optimization

# Effective phrase set design
phrase_set = speech.PhraseSet(
    phrases=[
        # Use realistic boost values (1.0-20.0)
        speech.PhraseSet.Phrase(value="critical term", boost=15.0),
        speech.PhraseSet.Phrase(value="important phrase", boost=10.0),
        speech.PhraseSet.Phrase(value="common word", boost=5.0),
        
        # Include variations and common misspellings
        speech.PhraseSet.Phrase(value="color", boost=5.0),
        speech.PhraseSet.Phrase(value="colour", boost=5.0),
        
        # Use context-specific phrases
        speech.PhraseSet.Phrase(value="machine learning model", boost=10.0),
        speech.PhraseSet.Phrase(value="neural network", boost=8.0),
    ]
)

Resource Naming Conventions

# Consistent naming for resources
project_id = "your-project-id"
location = "global"  # or specific region like "us-central1"

# Phrase set naming
phrase_set_id = "medical-terminology"  # Descriptive, lowercase, hyphenated
phrase_set_name = f"projects/{project_id}/locations/{location}/phraseSets/{phrase_set_id}"

# Custom class naming
custom_class_id = "drug-names"
custom_class_name = f"projects/{project_id}/locations/{location}/customClasses/{custom_class_id}"

Adaptation Performance

# Limit the number of adaptations for optimal performance
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code="en-US",
    speech_contexts=[
        speech.SpeechContext(
            speech_adaptation=speech.SpeechAdaptation(
                phrase_sets=[
                    "projects/project/locations/global/phraseSets/set1",
                    "projects/project/locations/global/phraseSets/set2",
                    # Limit to 2-3 phrase sets for best performance
                ]
            )
        )
    ],
    # Alternative: use direct phrase contexts for simple cases
    # speech_contexts=[
    #     speech.SpeechContext(
    #         phrases=["simple", "phrase", "list"]
    #     )
    # ],
)

Install with Tessl CLI

npx tessl i tessl/pypi-google-cloud-speech

docs

advanced-features.md

index.md

speech-adaptation.md

speech-recognition.md

streaming-recognition.md

types-and-configuration.md

tile.json