The official Python library for the groq API
—
High-performance chat completions with streaming support, function calling, tool usage, and advanced features like reasoning modes and search integration. The chat completions API provides both synchronous and asynchronous interfaces for generating conversational AI responses.
Generate chat completions with comprehensive configuration options, supporting both streaming and non-streaming responses.
def create(
messages: Iterable[ChatCompletionMessageParam],
model: str,
exclude_domains: Optional[List[str]] = NOT_GIVEN,
frequency_penalty: Optional[float] = NOT_GIVEN,
function_call: Optional[FunctionCall] = NOT_GIVEN,
functions: Optional[Iterable[Function]] = NOT_GIVEN,
include_domains: Optional[List[str]] = NOT_GIVEN,
include_reasoning: Optional[bool] = NOT_GIVEN,
logit_bias: Optional[Dict[str, int]] = NOT_GIVEN,
logprobs: Optional[bool] = NOT_GIVEN,
max_completion_tokens: Optional[int] = NOT_GIVEN,
max_tokens: Optional[int] = NOT_GIVEN,
metadata: Optional[Dict[str, str]] = NOT_GIVEN,
n: Optional[int] = NOT_GIVEN,
parallel_tool_calls: Optional[bool] = NOT_GIVEN,
presence_penalty: Optional[float] = NOT_GIVEN,
reasoning_effort: Optional[Literal["none", "default", "low", "medium", "high"]] = NOT_GIVEN,
reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] = NOT_GIVEN,
response_format: Optional[ResponseFormat] = NOT_GIVEN,
search_settings: Optional[SearchSettings] = NOT_GIVEN,
seed: Optional[int] = NOT_GIVEN,
service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] = NOT_GIVEN,
store: Optional[bool] = NOT_GIVEN,
stream: Optional[bool] = NOT_GIVEN,
temperature: Optional[float] = NOT_GIVEN,
tool_choice: Optional[ChatCompletionToolChoiceOptionParam] = NOT_GIVEN,
tools: Optional[Iterable[ChatCompletionToolParam]] = NOT_GIVEN,
top_logprobs: Optional[int] = NOT_GIVEN,
top_p: Optional[float] = NOT_GIVEN,
user: Optional[str] = NOT_GIVEN,
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN
) -> ChatCompletion | Stream[ChatCompletionChunk]:
"""
Create a chat completion with the specified messages and configuration.
Parameters:
- messages: List of conversation messages with roles and content
- model: Model identifier to use for the completion
- exclude_domains: Domains to exclude from search results
- frequency_penalty: Penalize tokens based on their frequency in the text so far
- function_call: Control which function is called (deprecated, use tools)
- functions: List of functions the model may call (deprecated, use tools)
- include_domains: Domains to include in search results
- include_reasoning: Whether to include reasoning in the response
- logit_bias: Modify likelihood of specified tokens appearing
- logprobs: Whether to return log probabilities
- max_completion_tokens: Maximum number of completion tokens to generate
- max_tokens: Maximum number of tokens to generate (deprecated, use max_completion_tokens)
- metadata: Optional metadata to attach to the request
- n: Number of completions to generate for each prompt
- parallel_tool_calls: Whether to enable parallel function calling
- presence_penalty: Penalize tokens based on whether they appear in the text
- reasoning_effort: Level of reasoning effort for o1 models
- reasoning_format: Format for reasoning output
- response_format: Format specification for the response
- search_settings: Configuration for search functionality
- seed: Random seed for deterministic sampling
- service_tier: Service quality tier
- stop: Sequences where the API will stop generating tokens
- store: Whether to store the conversation for model training
- stream: Whether to stream back partial progress
- temperature: Sampling temperature between 0 and 2
- tool_choice: Controls which tool is called by the model
- tools: List of tools the model may call
- top_logprobs: Number of most likely tokens to return at each position
- top_p: Nucleus sampling parameter
- user: Unique identifier representing your end-user
Returns:
ChatCompletion for non-streaming requests or Stream[ChatCompletionChunk] for streaming
"""Asynchronous version of chat completion creation with identical parameters and functionality.
async def create(
messages: Iterable[ChatCompletionMessageParam],
model: str,
**kwargs
) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
"""Async version of create() with identical parameters."""from groq import Groq
client = Groq()
completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is the capital of France?"}
],
model="llama3-8b-8192",
max_tokens=100,
temperature=0.7
)
print(completion.choices[0].message.content)from groq import Groq
client = Groq()
stream = client.chat.completions.create(
messages=[
{"role": "user", "content": "Write a short story about a robot."}
],
model="llama3-8b-8192",
stream=True,
max_tokens=500
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")from groq import Groq
client = Groq()
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
}
},
"required": ["location"]
}
}
}
]
completion = client.chat.completions.create(
messages=[
{"role": "user", "content": "What's the weather like in New York?"}
],
model="llama3-8b-8192",
tools=tools,
tool_choice="auto"
)
# Check if the model wants to call a function
if completion.choices[0].message.tool_calls:
tool_call = completion.choices[0].message.tool_calls[0]
print(f"Function to call: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")import asyncio
from groq import AsyncGroq
async def main():
client = AsyncGroq()
completion = await client.chat.completions.create(
messages=[
{"role": "user", "content": "Explain quantum computing briefly."}
],
model="llama3-8b-8192",
max_tokens=200
)
print(completion.choices[0].message.content)
asyncio.run(main())class ChatCompletionMessage:
content: Optional[str]
role: Literal["assistant", "system", "user", "tool", "function"]
function_call: Optional[FunctionCall]
tool_calls: Optional[List[ChatCompletionMessageToolCall]]
class ChatCompletionMessageParam:
role: str
content: Optional[str]
class ChatCompletionSystemMessageParam(ChatCompletionMessageParam):
role: Literal["system"]
content: str
class ChatCompletionUserMessageParam(ChatCompletionMessageParam):
role: Literal["user"]
content: Union[str, List[ChatCompletionContentPartParam]]
class ChatCompletionAssistantMessageParam(ChatCompletionMessageParam):
role: Literal["assistant"]
content: Optional[str]
function_call: Optional[ChatCompletionMessageToolCallParam]
tool_calls: Optional[List[ChatCompletionMessageToolCallParam]]
class ChatCompletionToolMessageParam(ChatCompletionMessageParam):
role: Literal["tool"]
content: str
tool_call_id: str
class ChatCompletionFunctionMessageParam(ChatCompletionMessageParam):
role: Literal["function"]
content: str
name: strclass ChatCompletionContentPartTextParam:
type: Literal["text"]
text: str
class ChatCompletionContentPartImageParam:
type: Literal["image_url"]
image_url: Dict[str, str]
ChatCompletionContentPartParam = Union[
ChatCompletionContentPartTextParam,
ChatCompletionContentPartImageParam
]class ChatCompletionToolParam:
type: Literal["function"]
function: FunctionDefinition
class ChatCompletionMessageToolCall:
id: str
type: Literal["function"]
function: Function
class ChatCompletionMessageToolCallParam:
id: str
type: Literal["function"]
function: Function
class ChatCompletionToolChoiceOptionParam:
type: Literal["function"]
function: ChatCompletionNamedToolChoiceParam
class ChatCompletionNamedToolChoiceParam:
name: strclass ChatCompletion:
id: str
choices: List[Choice]
created: int
model: str
object: Literal["chat.completion"]
usage: Optional[CompletionUsage]
class Choice:
finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter", "function_call"]]
index: int
logprobs: Optional[ChoiceLogprobs]
message: ChatCompletionMessage
class ChatCompletionChunk:
id: str
choices: List[ChoiceDelta]
created: int
model: str
object: Literal["chat.completion.chunk"]
usage: Optional[CompletionUsage]
class ChoiceDelta:
delta: Delta
finish_reason: Optional[str]
index: int
logprobs: Optional[ChoiceLogprobs]
class Delta:
content: Optional[str]
function_call: Optional[ChoiceDeltaFunctionCall]
role: Optional[Literal["system", "user", "assistant", "tool"]]
tool_calls: Optional[List[ChoiceDeltaToolCall]]class CompletionUsage:
completion_tokens: int
prompt_tokens: int
total_tokens: intclass FunctionDefinition:
name: str
description: Optional[str]
parameters: Optional[FunctionParameters]
class FunctionParameters:
# JSON Schema object defining function parameters
type: str
properties: Optional[Dict[str, Any]]
required: Optional[List[str]]class ResponseFormat:
type: Literal["text", "json_object"]class SearchSettings:
# Configuration for search functionality
max_results: Optional[int]
domains: Optional[List[str]]Install with Tessl CLI
npx tessl i tessl/pypi-groq