The official Python library for the groq API
npx @tessl/cli install tessl/pypi-groq@0.31.0The official Python library for the Groq API, providing access to Groq's high-performance language models and services. Groq enables fast inference for large language models with comprehensive support for chat completions, embeddings, audio processing, and more.
pip install groqimport groqStandard client usage:
from groq import GroqAsync client usage:
from groq import AsyncGroqType imports:
from groq import typesimport os
from groq import Groq
# Initialize client with API key from environment
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Create a chat completion
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello! How are you?"}
],
model="llama3-8b-8192",
max_tokens=100,
temperature=0.7
)
print(completion.choices[0].message.content)
# Stream a chat completion
stream = client.chat.completions.create(
messages=[
{"role": "user", "content": "Write a short poem about Python"}
],
model="llama3-8b-8192",
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")The Groq library follows a structured client-resource pattern:
Groq (sync) and AsyncGroq (async) serve as entry pointsHigh-performance chat completions with streaming support, function calling, tool usage, and advanced features like reasoning modes and search integration.
def create(
messages: Iterable[ChatCompletionMessageParam],
model: str,
max_tokens: Optional[int] = None,
temperature: Optional[float] = None,
stream: Optional[bool] = None,
tools: Optional[Iterable[ChatCompletionToolParam]] = None,
**kwargs
) -> ChatCompletion | Stream[ChatCompletionChunk]: ...Generate high-quality vector embeddings for text inputs, supporting both single strings and batch processing.
def create(
input: Union[str, List[str]],
model: Union[str, Literal["nomic-embed-text-v1_5"]],
**kwargs
) -> CreateEmbeddingResponse: ...Comprehensive audio capabilities including speech-to-text transcription, translation, and text-to-speech synthesis.
# client.audio.transcriptions.create()
def create(model: str, file: FileTypes, **kwargs) -> Transcription: ...
# client.audio.translations.create()
def create(model: str, file: FileTypes, **kwargs) -> Translation: ...
# client.audio.speech.create()
def create(input: str, model: str, voice: str, **kwargs) -> bytes: ...Access model information, list available models, and manage model lifecycle operations.
def list(**kwargs) -> ModelListResponse: ...
def retrieve(model: str, **kwargs) -> Model: ...
def delete(model: str, **kwargs) -> ModelDeleted: ...Upload, manage, and organize files for use with various Groq services and batch processing.
def create(file: FileTypes, purpose: str, **kwargs) -> FileCreateResponse: ...
def list(**kwargs) -> FileListResponse: ...
def retrieve(file_id: str, **kwargs) -> FileInfoResponse: ...
def delete(file_id: str, **kwargs) -> FileDeleteResponse: ...Submit and manage batch jobs for processing large volumes of requests efficiently.
def create(input_file_id: str, endpoint: str, completion_window: str, **kwargs) -> BatchCreateResponse: ...
def retrieve(batch_id: str, **kwargs) -> BatchRetrieveResponse: ...
def cancel(batch_id: str, **kwargs) -> BatchCancelResponse: ...
def list(**kwargs) -> BatchListResponse: ...class Groq:
def __init__(
self,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
default_query: Mapping[str, object] | None = None,
http_client: httpx.Client | None = None,
_strict_response_validation: bool = False
): ...
class AsyncGroq:
def __init__(
self,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
default_query: Mapping[str, object] | None = None,
http_client: httpx.AsyncClient | None = None,
_strict_response_validation: bool = False
): ...class GroqError(Exception): ...
class APIError(GroqError): ...
class APIStatusError(APIError): ...
class APITimeoutError(APIConnectionError): ...
class APIConnectionError(APIError): ...
class APIResponseValidationError(APIError): ...
class BadRequestError(APIStatusError): ...
class AuthenticationError(APIStatusError): ...
class PermissionDeniedError(APIStatusError): ...
class NotFoundError(APIStatusError): ...
class ConflictError(APIStatusError): ...
class UnprocessableEntityError(APIStatusError): ...
class RateLimitError(APIStatusError): ...
class InternalServerError(APIStatusError): ...class BaseModel: ...
class NotGiven: ...
NOT_GIVEN: NotGiven
class Timeout: ...
class RequestOptions: ...
# Response wrappers
class APIResponse: ...
class AsyncAPIResponse: ...
class Stream: ...
class AsyncStream: ...
# HTTP clients
class DefaultHttpxClient: ...
class DefaultAsyncHttpxClient: ...
class DefaultAioHttpClient: ...DEFAULT_TIMEOUT: httpx.Timeout
DEFAULT_MAX_RETRIES: int
DEFAULT_CONNECTION_LIMITS: httpx.Limitsdef file_from_path(path: str) -> FileTypes: ...