An integration package connecting Groq's Language Processing Unit (LPU) with LangChain for high-performance AI inference
npx @tessl/cli install tessl/pypi-langchain-groq@0.3.0An integration package connecting Groq's Language Processing Unit (LPU) with LangChain for high-performance AI inference. This package provides seamless access to Groq's deterministic, single-core streaming architecture that delivers predictable and repeatable performance for GenAI inference workloads.
pip install langchain-groqfrom langchain_groq import ChatGroqImport version information:
from langchain_groq import __version__from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
# Basic initialization
llm = ChatGroq(
model="llama-3.1-8b-instant",
temperature=0.0,
api_key="your-groq-api-key" # or set GROQ_API_KEY env var
)
# Simple conversation
messages = [
SystemMessage(content="You are a helpful assistant."),
HumanMessage(content="What is the capital of France?")
]
response = llm.invoke(messages)
print(response.content)
# Streaming response
for chunk in llm.stream(messages):
print(chunk.content, end="", flush=True)LangChain Groq integrates with the LangChain ecosystem through the standard BaseChatModel interface, providing:
The package follows LangChain's standard patterns while leveraging Groq's unique deterministic architecture for reproducible results across inference runs.
Initialize the ChatGroq model with comprehensive configuration options for performance, behavior, and API settings.
class ChatGroq:
def __init__(
self,
model: str,
temperature: float = 0.7,
max_tokens: Optional[int] = None,
stop: Optional[Union[List[str], str]] = None,
reasoning_format: Optional[Literal["parsed", "raw", "hidden"]] = None,
reasoning_effort: Optional[str] = None,
service_tier: Literal["on_demand", "flex", "auto"] = "on_demand",
api_key: Optional[str] = None,
base_url: Optional[str] = None,
timeout: Union[float, Tuple[float, float], Any, None] = None,
max_retries: int = 2,
streaming: bool = False,
n: int = 1,
model_kwargs: Dict[str, Any] = None,
default_headers: Union[Mapping[str, str], None] = None,
default_query: Union[Mapping[str, object], None] = None,
http_client: Union[Any, None] = None,
http_async_client: Union[Any, None] = None,
**kwargs: Any
) -> None:
"""
Initialize ChatGroq model.
Parameters:
- model: Name of Groq model (e.g., "llama-3.1-8b-instant")
Note: Aliased to internal field 'model_name'
- temperature: Sampling temperature (0.0 to 1.0)
- max_tokens: Maximum tokens to generate
- stop: Stop sequences (string or list of strings)
Note: Aliased to internal field 'stop_sequences'
- reasoning_format: Format for reasoning output ("parsed", "raw", "hidden")
- reasoning_effort: Level of reasoning effort
- service_tier: Service tier ("on_demand", "flex", "auto")
- api_key: Groq API key (defaults to GROQ_API_KEY env var)
Note: Aliased to internal field 'groq_api_key'
- base_url: Custom API base URL
Note: Aliased to internal field 'groq_api_base'
- timeout: Request timeout in seconds
Note: Aliased to internal field 'request_timeout'
- max_retries: Maximum retry attempts
- streaming: Enable streaming responses
- n: Number of completions to generate
- model_kwargs: Additional model parameters
- default_headers: Default HTTP headers
- default_query: Default query parameters
- http_client: Custom httpx client for sync requests
- http_async_client: Custom httpx client for async requests
"""Generate responses using synchronous methods for immediate results and batch processing.
def invoke(
self,
input: LanguageModelInput,
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> BaseMessage:
"""
Generate a single response from input messages.
Parameters:
- input: Messages (list of BaseMessage) or string
- config: Runtime configuration
- **kwargs: Additional parameters
Returns:
BaseMessage: Generated response message
"""
def batch(
self,
inputs: List[LanguageModelInput],
config: Optional[Union[RunnableConfig, List[RunnableConfig]]] = None,
**kwargs: Any
) -> List[BaseMessage]:
"""
Process multiple inputs in batch.
Parameters:
- inputs: List of message sequences or strings
- config: Runtime configuration(s)
- **kwargs: Additional parameters
Returns:
List[BaseMessage]: List of generated responses
"""
def stream(
self,
input: LanguageModelInput,
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> Iterator[BaseMessageChunk]:
"""
Stream response tokens as they're generated.
Parameters:
- input: Messages (list of BaseMessage) or string
- config: Runtime configuration
- **kwargs: Additional parameters
Yields:
BaseMessageChunk: Individual response chunks
"""
def generate(
self,
messages: List[List[BaseMessage]],
stop: Optional[List[str]] = None,
callbacks: Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]] = None,
**kwargs: Any
) -> LLMResult:
"""
Legacy generate method returning detailed results.
Parameters:
- messages: List of message sequences
- stop: Stop sequences
- callbacks: Callback handlers
- **kwargs: Additional parameters
Returns:
LLMResult: Detailed generation results with metadata
"""Generate responses using asynchronous methods for concurrent processing and high-throughput applications.
async def ainvoke(
self,
input: LanguageModelInput,
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> BaseMessage:
"""
Asynchronously generate a single response.
Parameters:
- input: Messages (list of BaseMessage) or string
- config: Runtime configuration
- **kwargs: Additional parameters
Returns:
BaseMessage: Generated response message
"""
async def abatch(
self,
inputs: List[LanguageModelInput],
config: Optional[Union[RunnableConfig, List[RunnableConfig]]] = None,
**kwargs: Any
) -> List[BaseMessage]:
"""
Asynchronously process multiple inputs in batch.
Parameters:
- inputs: List of message sequences or strings
- config: Runtime configuration(s)
- **kwargs: Additional parameters
Returns:
List[BaseMessage]: List of generated responses
"""
async def astream(
self,
input: LanguageModelInput,
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> AsyncIterator[BaseMessageChunk]:
"""
Asynchronously stream response tokens.
Parameters:
- input: Messages (list of BaseMessage) or string
- config: Runtime configuration
- **kwargs: Additional parameters
Yields:
BaseMessageChunk: Individual response chunks
"""
async def agenerate(
self,
messages: List[List[BaseMessage]],
stop: Optional[List[str]] = None,
callbacks: Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]] = None,
**kwargs: Any
) -> LLMResult:
"""
Asynchronously generate with detailed results.
Parameters:
- messages: List of message sequences
- stop: Stop sequences
- callbacks: Callback handlers
- **kwargs: Additional parameters
Returns:
LLMResult: Detailed generation results with metadata
"""Bind tools and functions to enable function calling capabilities with the Groq model.
def bind_tools(
self,
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
*,
tool_choice: Optional[Union[Dict, str, Literal["auto", "any", "none"], bool]] = None,
**kwargs: Any
) -> Runnable[LanguageModelInput, BaseMessage]:
"""
Bind tools for function calling.
Parameters:
- tools: List of tool definitions (Pydantic models, functions, or dicts)
- tool_choice: Tool selection strategy
- "auto": Model chooses whether to call tools
- "any"/"required": Model must call a tool
- "none": Disable tool calling
- str: Specific tool name to call
- bool: True requires single tool call
- dict: {"type": "function", "function": {"name": "tool_name"}}
- **kwargs: Additional binding parameters
Returns:
Runnable: Model with bound tools
"""
def bind_functions(
self,
functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
function_call: Optional[Union[Dict, str, Literal["auto", "none"]]] = None,
**kwargs: Any
) -> Runnable[LanguageModelInput, BaseMessage]:
"""
[DEPRECATED] Bind functions for function calling. Use bind_tools instead.
This method is deprecated since version 0.2.1 and will be removed in 1.0.0.
Use bind_tools() for new development.
Parameters:
- functions: List of function definitions (dicts, Pydantic models, callables, or tools)
- function_call: Function call strategy
- "auto": Model chooses whether to call function
- "none": Disable function calling
- str: Specific function name to call
- dict: {"name": "function_name"}
- **kwargs: Additional binding parameters
Returns:
Runnable: Model with bound functions
"""Generate responses conforming to specific schemas using function calling or JSON mode.
def with_structured_output(
self,
schema: Optional[Union[Dict, Type[BaseModel]]] = None,
*,
method: Literal["function_calling", "json_mode"] = "function_calling",
include_raw: bool = False,
**kwargs: Any
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
"""
Create model that outputs structured data.
Parameters:
- schema: Output schema (Pydantic model, TypedDict, or OpenAI function schema)
- method: Generation method
- "function_calling": Use function calling API
- "json_mode": Use JSON mode (requires schema instructions in prompt)
- include_raw: Include raw response alongside parsed output
- **kwargs: Additional parameters
Returns:
Runnable: Model that returns structured output
If include_raw=False:
- Returns: Instance of schema type (if Pydantic) or dict
If include_raw=True:
- Returns: Dict with keys 'raw', 'parsed', 'parsing_error'
"""Access model configuration and type information.
@property
def _llm_type(self) -> str:
"""
Return model type identifier for LangChain integration.
Returns:
str: Always returns "groq-chat"
"""
@property
def lc_secrets(self) -> Dict[str, str]:
"""
Return secret field mappings for serialization.
Returns:
Dict[str, str]: Mapping of secret fields to environment variables
{"groq_api_key": "GROQ_API_KEY"}
"""
@classmethod
def is_lc_serializable(cls) -> bool:
"""
Check if model supports LangChain serialization.
Returns:
bool: Always returns True
"""from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
class WeatherTool(BaseModel):
"""Get weather information for a location."""
location: str = Field(description="City and state, e.g. 'San Francisco, CA'")
llm = ChatGroq(model="llama-3.1-8b-instant")
llm_with_tools = llm.bind_tools([WeatherTool], tool_choice="auto")
response = llm_with_tools.invoke("What's the weather in New York?")
print(response.tool_calls)from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
from typing import Optional
class PersonInfo(BaseModel):
"""Extract person information from text."""
name: str = Field(description="Person's full name")
age: Optional[int] = Field(description="Person's age if mentioned")
occupation: Optional[str] = Field(description="Person's job or profession")
llm = ChatGroq(model="llama-3.1-8b-instant")
structured_llm = llm.with_structured_output(PersonInfo)
result = structured_llm.invoke("John Smith is a 35-year-old software engineer.")
print(f"Name: {result.name}, Age: {result.age}, Job: {result.occupation}")from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
# Use reasoning-capable model with parsed reasoning format
llm = ChatGroq(
model="deepseek-r1-distill-llama-70b",
reasoning_format="parsed"
)
messages = [
SystemMessage(content="You are a math tutor. Show your reasoning."),
HumanMessage(content="If a train travels 120 miles in 2 hours, what's its average speed?")
]
response = llm.invoke(messages)
print("Answer:", response.content)
print("Reasoning:", response.additional_kwargs.get("reasoning_content", "No reasoning available"))from langchain_groq import ChatGroq
llm = ChatGroq(model="llama-3.1-8b-instant")
messages = [{"role": "user", "content": "Write a short poem about coding."}]
full_response = None
for chunk in llm.stream(messages):
print(chunk.content, end="", flush=True)
if full_response is None:
full_response = chunk
else:
full_response += chunk
print("\n\nToken usage:", full_response.usage_metadata)
print("Response metadata:", full_response.response_metadata)ChatGroq responses include comprehensive metadata for monitoring and optimization:
# Response metadata structure
{
"token_usage": {
"completion_tokens": int, # Output tokens used
"prompt_tokens": int, # Input tokens used
"total_tokens": int, # Total tokens used
"completion_time": float, # Time for completion
"prompt_time": float, # Time for prompt processing
"queue_time": Optional[float], # Time spent in queue
"total_time": float # Total processing time
},
"model_name": str, # Model used for generation
"system_fingerprint": str, # System configuration fingerprint
"finish_reason": str, # Completion reason ("stop", "length", etc.)
"service_tier": str, # Service tier used
"reasoning_effort": Optional[str] # Reasoning effort level (if applicable)
}The package handles various error conditions and provides clear error messages:
from langchain_groq import ChatGroq
from groq import BadRequestError
try:
llm = ChatGroq(model="invalid-model")
response = llm.invoke("Hello")
except BadRequestError as e:
print(f"API Error: {e}")
except ValueError as e:
print(f"Configuration Error: {e}")Common validation errors:
n must be >= 1n must be 1 when streaming is enabled# Core types used throughout the API
from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union
from typing_extensions import TypedDict
from langchain_core.messages import BaseMessage, BaseMessageChunk
from langchain_core.outputs import ChatResult, LLMResult
from langchain_core.language_models import LanguageModelInput
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_core.callbacks import BaseCallbackHandler, BaseCallbackManager
from langchain_core.tools import BaseTool
from pydantic import BaseModel, SecretStr
from collections.abc import AsyncIterator, Iterator, Mapping
# Message types for input
LanguageModelInput = Union[
str, # Simple string input
List[BaseMessage], # List of messages
# ... other LangChain input types
]
# Service tier options
ServiceTier = Literal["on_demand", "flex", "auto"]
# Reasoning format options
ReasoningFormat = Literal["parsed", "raw", "hidden"]
# Tool choice options
ToolChoice = Union[
Dict, # {"type": "function", "function": {"name": "tool_name"}}
str, # Tool name or "auto"/"any"/"none"
Literal["auto", "any", "none"],
bool # True for single tool requirement
]