Tessl Tile for pypi/groq@0.31.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

audio.md batches.md chat-completions.md embeddings.md files.md index.md models.md

audio.mddocs/

0
# Audio Processing
1

2
Comprehensive audio capabilities including speech-to-text transcription, translation, and text-to-speech synthesis. The audio API provides high-quality processing for various audio formats and use cases.
3

4
## Capabilities
5

6
### Speech-to-Text Transcription
7

8
Convert audio files to text with high accuracy and support for multiple languages and formats.
9

10
```python { .api }
11
def transcribe(
12
    file: FileTypes,
13
    model: str,
14
    language: Optional[str] = NOT_GIVEN,
15
    prompt: Optional[str] = NOT_GIVEN,
16
    response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]] = NOT_GIVEN,
17
    temperature: Optional[float] = NOT_GIVEN,
18
    timestamp_granularities: Optional[List[Literal["word", "segment"]]] = NOT_GIVEN,
19
    extra_headers: Headers | None = None,
20
    extra_query: Query | None = None,
21
    extra_body: Body | None = None,
22
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
23
) -> TranscriptionResponse:
24
    """
25
    Transcribe audio to text.
26
    
27
    Parameters:
28
    - file: Audio file to transcribe (various formats supported)
29
    - model: Model to use for transcription
30
    - language: Language of the input audio (ISO-639-1 format)
31
    - prompt: Optional text prompt to guide the model's style
32
    - response_format: Format of the transcript output
33
    - temperature: Sampling temperature between 0 and 1
34
    - timestamp_granularities: Timestamp granularities to populate
35
    
36
    Returns:
37
    TranscriptionResponse with transcribed text and optional metadata
38
    """
39
```
40

41
### Speech Translation
42

43
Translate audio from various languages to English text.
44

45
```python { .api }
46
def translate(
47
    file: FileTypes,
48
    model: str,
49
    prompt: Optional[str] = NOT_GIVEN,
50
    response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]] = NOT_GIVEN,
51
    temperature: Optional[float] = NOT_GIVEN,
52
    extra_headers: Headers | None = None,
53
    extra_query: Query | None = None,
54
    extra_body: Body | None = None,
55
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
56
) -> TranslationResponse:
57
    """
58
    Translate audio to English text.
59
    
60
    Parameters:
61
    - file: Audio file to translate (various formats supported)
62
    - model: Model to use for translation
63
    - prompt: Optional text prompt to guide the model's style
64
    - response_format: Format of the transcript output
65
    - temperature: Sampling temperature between 0 and 1
66
    
67
    Returns:
68
    TranslationResponse with translated English text and optional metadata
69
    """
70
```
71

72
### Text-to-Speech Synthesis
73

74
Generate spoken audio from text input with various voice options.
75

76
```python { .api }
77
def speech(
78
    input: str,
79
    model: str,
80
    voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
81
    response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]] = NOT_GIVEN,
82
    speed: Optional[float] = NOT_GIVEN,
83
    extra_headers: Headers | None = None,
84
    extra_query: Query | None = None,
85
    extra_body: Body | None = None,
86
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
87
) -> bytes:
88
    """
89
    Generate audio from text.
90
    
91
    Parameters:
92
    - input: Text to convert to audio
93
    - model: Model to use for speech synthesis
94
    - voice: Voice to use for the generated audio
95
    - response_format: Audio format for the output
96
    - speed: Speed of the generated audio (0.25 to 4.0)
97
    
98
    Returns:
99
    Raw audio bytes in the specified format
100
    """
101
```
102

103
### Async Audio Operations
104

105
All audio operations have asynchronous counterparts with identical parameters.
106

107
```python { .api }
108
async def transcribe(file: FileTypes, model: str, **kwargs) -> TranscriptionResponse: ...
109
async def translate(file: FileTypes, model: str, **kwargs) -> TranslationResponse: ...
110
async def speech(input: str, model: str, voice: str, **kwargs) -> bytes: ...
111
```
112

113
## Usage Examples
114

115
### Audio Transcription
116

117
```python
118
from groq import Groq
119

120
client = Groq()
121

122
# Transcribe an audio file
123
with open("audio.mp3", "rb") as audio_file:
124
    transcript = client.audio.transcriptions.create(
125
        file=audio_file,
126
        model="whisper-large-v3",
127
        language="en",
128
        response_format="text"
129
    )
130

131
print("Transcript:", transcript)
132

133
# With detailed response format
134
with open("audio.wav", "rb") as audio_file:
135
    response = client.audio.transcriptions.create(
136
        file=audio_file,
137
        model="whisper-large-v3",
138
        response_format="verbose_json",
139
        timestamp_granularities=["word", "segment"]
140
    )
141

142
print("Text:", response.text)
143
print("Language:", response.language)
144
for segment in response.segments:
145
    print(f"[{segment.start:.2f}s - {segment.end:.2f}s]: {segment.text}")
146
```
147

148
### Audio Translation
149

150
```python
151
from groq import Groq
152

153
client = Groq()
154

155
# Translate non-English audio to English
156
with open("spanish_audio.mp3", "rb") as audio_file:
157
    translation = client.audio.translations.create(
158
        file=audio_file,
159
        model="whisper-large-v3",
160
        response_format="text"
161
    )
162

163
print("English translation:", translation)
164

165
# With JSON response format
166
with open("french_audio.wav", "rb") as audio_file:
167
    response = client.audio.translations.create(
168
        file=audio_file,
169
        model="whisper-large-v3",
170
        response_format="json"
171
    )
172

173
print("Translated text:", response.text)
174
```
175

176
### Text-to-Speech
177

178
```python
179
from groq import Groq
180

181
client = Groq()
182

183
# Generate speech from text
184
response = client.audio.speech.create(
185
    input="Hello, this is a test of the text-to-speech functionality.",
186
    model="tts-1",
187
    voice="nova",
188
    response_format="mp3"
189
)
190

191
# Save the audio to a file
192
with open("output.mp3", "wb") as audio_file:
193
    audio_file.write(response)
194

195
# Different voice and format
196
response = client.audio.speech.create(
197
    input="This is a different voice and format example.",
198
    model="tts-1-hd",
199
    voice="alloy",
200
    response_format="wav",
201
    speed=1.2
202
)
203

204
with open("output.wav", "wb") as audio_file:
205
    audio_file.write(response)
206
```
207

208
### Using file_from_path Utility
209

210
```python
211
from groq import Groq, file_from_path
212

213
client = Groq()
214

215
# Use the utility function for file handling
216
audio_file = file_from_path("path/to/audio.mp3")
217
transcript = client.audio.transcriptions.create(
218
    file=audio_file,
219
    model="whisper-large-v3"
220
)
221

222
print(transcript)
223
```
224

225
### Async Usage
226

227
```python
228
import asyncio
229
from groq import AsyncGroq
230

231
async def main():
232
    client = AsyncGroq()
233
    
234
    # Async transcription
235
    with open("audio.mp3", "rb") as audio_file:
236
        transcript = await client.audio.transcriptions.create(
237
            file=audio_file,
238
            model="whisper-large-v3",
239
            response_format="text"
240
        )
241
    
242
    print("Transcript:", transcript)
243
    
244
    # Async text-to-speech
245
    speech_response = await client.audio.speech.create(
246
        input="Async text-to-speech example",
247
        model="tts-1",
248
        voice="echo"
249
    )
250
    
251
    with open("async_output.mp3", "wb") as f:
252
        f.write(speech_response)
253

254
asyncio.run(main())
255
```
256

257
## Types
258

259
### File Types
260

261
```python { .api }
262
FileTypes = Union[IO[bytes], bytes, PathLike, str]
263
```
264

265
### Response Types
266

267
```python { .api }
268
class TranscriptionResponse:
269
    text: str
270

271
class TranslationResponse:
272
    text: str
273

274
# Verbose response format (when response_format="verbose_json")
275
class TranscriptionVerboseResponse:
276
    text: str
277
    language: str
278
    duration: float
279
    segments: List[TranscriptionSegment]
280
    words: Optional[List[TranscriptionWord]]
281

282
class TranscriptionSegment:
283
    id: int
284
    seek: int
285
    start: float
286
    end: float
287
    text: str
288
    tokens: List[int]
289
    temperature: float
290
    avg_logprob: float
291
    compression_ratio: float
292
    no_speech_prob: float
293

294
class TranscriptionWord:
295
    word: str
296
    start: float
297
    end: float
298
```
299

300
### Request Parameter Types
301

302
```python { .api }
303
class TranscriptionCreateParams:
304
    file: FileTypes
305
    model: str
306
    language: Optional[str]
307
    prompt: Optional[str]
308
    response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]]
309
    temperature: Optional[float]
310
    timestamp_granularities: Optional[List[Literal["word", "segment"]]]
311

312
class TranslationCreateParams:
313
    file: FileTypes
314
    model: str
315
    prompt: Optional[str]
316
    response_format: Optional[Literal["json", "text", "srt", "verbose_json", "vtt"]]
317
    temperature: Optional[float]
318

319
class SpeechCreateParams:
320
    input: str
321
    model: str
322
    voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
323
    response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]]
324
    speed: Optional[float]
325
```

Version

Tile

Files

audio.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

audio.mddocs/