Groq integration for LangChain.js providing chat model capabilities for high-performance language model inference
npx @tessl/cli install tessl/npm-langchain--groq@0.2.0@langchain/groq provides a LangChain.js integration for Groq's high-performance language model inference API. It implements the ChatGroq class that extends LangChain's chat model interface to interact with Groq's API, supporting various models like llama-3.3-70b-versatile with built-in streaming, tool calling, and structured output capabilities.
npm install @langchain/groq @langchain/coreimport { ChatGroq } from "@langchain/groq";For CommonJS:
const { ChatGroq } = require("@langchain/groq");import { ChatGroq } from "@langchain/groq";
import { HumanMessage } from "@langchain/core/messages";
// Create a chat model instance
const model = new ChatGroq({
apiKey: process.env.GROQ_API_KEY, // or provide directly
model: "llama-3.3-70b-versatile",
temperature: 0.7,
});
// Basic text generation
const message = new HumanMessage("What color is the sky?");
const response = await model.invoke([message]);
console.log(response.content);
// Streaming responses
for await (const chunk of await model.stream([message])) {
console.log(chunk.content);
}The @langchain/groq package is built around these key components:
Core chat model functionality for generating responses from Groq's language models.
class ChatGroq extends BaseChatModel<ChatGroqCallOptions, AIMessageChunk> {
constructor(fields: ChatGroqInput);
invoke(
messages: BaseMessage[],
options?: Partial<ChatGroqCallOptions>
): Promise<BaseMessage>;
stream(
messages: BaseMessage[],
options?: Partial<ChatGroqCallOptions>
): Promise<AsyncIterable<BaseMessageChunk>>;
bindTools(
tools: ChatGroqToolType[],
kwargs?: Partial<ChatGroqCallOptions>
): Runnable<BaseLanguageModelInput, AIMessageChunk, ChatGroqCallOptions>;
withStructuredOutput<RunOutput extends Record<string, any> = Record<string, any>>(
outputSchema: InteropZodType<RunOutput> | Record<string, any>,
config?: StructuredOutputMethodOptions<boolean>
): Runnable<BaseLanguageModelInput, RunOutput> | Runnable<BaseLanguageModelInput, { raw: BaseMessage; parsed: RunOutput }>;
}Usage Examples:
import { ChatGroq } from "@langchain/groq";
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
// Initialize with configuration
const chat = new ChatGroq({
model: "llama-3.3-70b-versatile",
temperature: 0.7,
maxTokens: 1000,
apiKey: "your-api-key",
});
// Multi-turn conversation
const messages = [
new SystemMessage("You are a helpful assistant."),
new HumanMessage("Explain quantum computing in simple terms."),
];
const response = await chat.invoke(messages);
// With runtime options
const configuredResponse = await chat.invoke(messages, {
temperature: 0.2,
stop: ["END"],
headers: { "Custom-Header": "value" },
});Bind tools to enable function calling capabilities with Groq models that support it.
bindTools(
tools: ChatGroqToolType[],
kwargs?: Partial<ChatGroqCallOptions>
): Runnable<BaseLanguageModelInput, AIMessageChunk, ChatGroqCallOptions>;
type ChatGroqToolType = BindToolsInput | ChatCompletionTool;Usage Examples:
import { ChatGroq } from "@langchain/groq";
import { tool } from "@langchain/core/tools";
import { z } from "zod";
// Define a tool
const weatherTool = tool(
async ({ location }: { location: string }) => {
return `The weather in ${location} is sunny and 72°F`;
},
{
name: "get_weather",
description: "Get the current weather for a location",
schema: z.object({
location: z.string().describe("The city and state, e.g. San Francisco, CA"),
}),
}
);
// Bind tools to model
const modelWithTools = new ChatGroq({
model: "llama3-groq-70b-8192-tool-use-preview",
}).bindTools([weatherTool]);
const response = await modelWithTools.invoke([
new HumanMessage("What's the weather like in New York?")
]);
// Check for tool calls in response
if (response.tool_calls?.length > 0) {
console.log("Tool calls:", response.tool_calls);
}Generate structured JSON responses using Zod schemas or JSON schemas.
withStructuredOutput<RunOutput extends Record<string, any> = Record<string, any>>(
outputSchema: InteropZodType<RunOutput> | Record<string, any>,
config?: StructuredOutputMethodOptions<boolean>
): Runnable<BaseLanguageModelInput, RunOutput> | Runnable<BaseLanguageModelInput, { raw: BaseMessage; parsed: RunOutput }>;
interface StructuredOutputMethodOptions<IncludeRaw extends boolean = false> {
name?: string;
method?: "functionCalling" | "jsonMode";
includeRaw?: IncludeRaw;
}Usage Examples:
import { ChatGroq } from "@langchain/groq";
import { z } from "zod";
// Define output schema
const PersonSchema = z.object({
name: z.string().describe("Person's full name"),
age: z.number().describe("Person's age in years"),
occupation: z.string().describe("Person's job or profession"),
location: z.string().describe("City and country where person lives"),
});
// Create structured output model
const structuredModel = new ChatGroq({
model: "llama3-groq-70b-8192-tool-use-preview",
}).withStructuredOutput(PersonSchema, { name: "extract_person_info" });
// Generate structured response
const result = await structuredModel.invoke([
new HumanMessage("Tell me about Alice Johnson, a 32-year-old software engineer from Toronto, Canada.")
]);
console.log(result);
// Output: { name: "Alice Johnson", age: 32, occupation: "software engineer", location: "Toronto, Canada" }
// Include raw response
const modelWithRaw = new ChatGroq({
model: "llama3-groq-70b-8192-tool-use-preview",
}).withStructuredOutput(PersonSchema, { includeRaw: true });
const resultWithRaw = await modelWithRaw.invoke([...]);
console.log(resultWithRaw.parsed); // Structured data
console.log(resultWithRaw.raw); // Original messageStream response chunks for real-time applications.
stream(
messages: BaseMessage[],
options?: Partial<ChatGroqCallOptions>
): Promise<AsyncIterable<BaseMessageChunk>>;Usage Examples:
import { ChatGroq } from "@langchain/groq";
import { HumanMessage } from "@langchain/core/messages";
import { concat } from "@langchain/core/utils/stream";
const chat = new ChatGroq({
model: "llama-3.3-70b-versatile",
streamUsage: true, // Include token usage in stream
});
// Basic streaming
for await (const chunk of await chat.stream([
new HumanMessage("Write a short story about a robot learning to paint.")
])) {
console.log(chunk.content);
}
// Aggregate streaming chunks
const stream = await chat.stream([new HumanMessage("Explain photosynthesis")]);
let full: AIMessageChunk | undefined;
for await (const chunk of stream) {
full = !full ? chunk : concat(full, chunk);
}
console.log(full.content);
// Stream with options
for await (const chunk of await chat.stream([...], {
temperature: 0.3,
max_tokens: 500,
stream_options: { include_usage: true },
})) {
if (chunk.response_metadata?.usage) {
console.log("Token usage:", chunk.response_metadata.usage);
}
}Constructor configuration interface for ChatGroq.
interface ChatGroqInput extends BaseChatModelParams {
/** Required: The Groq model name to use */
model: string;
/** API key (defaults to GROQ_API_KEY environment variable) */
apiKey?: string;
/** Sampling temperature between 0 and 2 (default: 0.7) */
temperature?: number;
/** Maximum number of tokens in response */
maxTokens?: number;
/** Nucleus sampling parameter (0-1) */
topP?: number;
/** Frequency penalty for reducing repetition (-2 to 2) */
frequencyPenalty?: number;
/** Presence penalty for encouraging topic diversity (-2 to 2) */
presencePenalty?: number;
/** Number of completions to generate */
n?: number;
/** Token probability adjustments */
logitBias?: Record<string, number>;
/** End-user identifier for monitoring */
user?: string;
/** Enable streaming responses */
streaming?: boolean;
/** Include token usage data in streamed chunks (default: true) */
streamUsage?: boolean;
/** Return log probabilities of output tokens */
logprobs?: boolean;
/** Number of most likely tokens to return at each position (0-5) */
topLogprobs?: number;
/** Stop sequences (up to 4) */
stop?: string | null | Array<string>;
stopSequences?: Array<string>;
/** Override API base URL */
baseUrl?: string;
/** Request timeout in milliseconds */
timeout?: number;
/** Custom HTTP agent */
httpAgent?: any;
/** Custom fetch function */
fetch?: (...args: any) => any;
/** Default headers for all requests */
defaultHeaders?: Record<string, string>;
/** Default query parameters for all requests */
defaultQuery?: Record<string, string>;
}Runtime options for invoke/stream methods.
interface ChatGroqCallOptions extends BaseChatModelCallOptions {
/** Additional headers for this specific request */
headers?: Record<string, string | null | undefined>;
/** Index of the prompt in a list of prompts */
promptIndex?: number;
/** Stream configuration options */
stream_options?: {
/** Whether to include token usage in the stream */
include_usage: boolean;
};
/** Tools for function calling */
tools?: ChatGroqToolType[];
/** Override temperature for this call */
temperature?: number;
/** Override max tokens for this call */
max_tokens?: number;
max_completion_tokens?: number;
/** Override stop sequences for this call */
stop?: string | string[];
/** Tool choice strategy */
tool_choice?: string | {
type: "function";
function: { name: string };
};
/** Response format specification */
response_format?: {
type: "json_object" | "text";
};
/** Seed for deterministic sampling */
seed?: number;
/** Enable parallel tool calls */
parallel_tool_calls?: boolean;
/** Frequency penalty override */
frequency_penalty?: number;
/** Presence penalty override */
presence_penalty?: number;
/** Logit bias override */
logit_bias?: Record<string, number>;
/** Log probabilities override */
logprobs?: boolean;
/** Top log probabilities override */
top_logprobs?: number;
/** Top-p override */
top_p?: number;
/** Reasoning format for compatible models */
reasoning_format?: "text";
/** Service tier specification */
service_tier?: "auto" | "default";
}Types returned by ChatGroq methods.
interface TimingMetadata {
/** Time spent generating tokens (seconds) */
completion_time?: number;
/** Time spent processing input tokens (seconds) */
prompt_time?: number;
/** Time request spent in queue (seconds) */
queue_time?: number;
/** Total time (completion_time + prompt_time) (seconds) */
total_time?: number;
}
interface TokenUsage {
/** Number of tokens in completion */
completionTokens?: number;
/** Number of tokens in prompt */
promptTokens?: number;
/** Total tokens used */
totalTokens?: number;
}Helper functions for converting between LangChain and Groq message formats.
/**
* Convert LangChain message type to Groq role string
*/
function messageToGroqRole(message: BaseMessage): GroqRoleEnum;
type GroqRoleEnum = "system" | "assistant" | "user" | "function";The ChatGroq class throws standard JavaScript errors for various conditions:
Common error handling pattern:
try {
const response = await chat.invoke([message]);
console.log(response.content);
} catch (error) {
if (error.message.includes("API key")) {
console.error("Authentication failed - check your API key");
} else if (error.message.includes("rate limit")) {
console.error("Rate limit exceeded - implement retry logic");
} else {
console.error("Unexpected error:", error.message);
}
}export GROQ_API_KEY="your-groq-api-key-here"