Calculate expected costs for using different LLM models in conversational AI agents based on prompt length, knowledge base size, and RAG configuration.
import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Access this API via: client.conversationalAi.llmUsageCalculate the expected cost per minute for different LLM models based on agent configuration.
/**
* Returns a list of LLM models and the expected cost for using them
* @param request - Calculator parameters
* @param requestOptions - Optional request configuration
* @returns LLM pricing information for available models
* @throws UnprocessableEntityError if validation fails
*/
client.conversationalAi.llmUsage.calculate(
request: LlmUsageCalculatorPublicRequestModel,
requestOptions?: RequestOptions
): HttpResponsePromise<LlmUsageCalculatorResponseModel>;
interface LlmUsageCalculatorPublicRequestModel {
/** Length of the prompt in characters */
promptLength: number;
/** Number of pages of content in PDF documents or URLs in the agent's knowledge base */
numberOfPages: number;
/** Whether RAG (Retrieval Augmented Generation) is enabled */
ragEnabled: boolean;
}
interface LlmUsageCalculatorResponseModel {
/** Array of LLM models with their calculated pricing */
llmPrices: LlmUsageCalculatorLlmResponseModel[];
}
interface LlmUsageCalculatorLlmResponseModel {
/** LLM model identifier */
llm: Llm;
/** Calculated price per minute for this configuration */
pricePerMinute: number;
}const Llm = {
// OpenAI GPT Models
Gpt4OMini: "gpt-4o-mini",
Gpt4O: "gpt-4o",
Gpt4: "gpt-4",
Gpt4Turbo: "gpt-4-turbo",
Gpt41: "gpt-4.1",
Gpt41Mini: "gpt-4.1-mini",
Gpt41Nano: "gpt-4.1-nano",
Gpt5: "gpt-5",
Gpt51: "gpt-5.1",
Gpt52: "gpt-5.2",
Gpt52ChatLatest: "gpt-5.2-chat-latest",
Gpt5Mini: "gpt-5-mini",
Gpt5Nano: "gpt-5-nano",
Gpt35Turbo: "gpt-3.5-turbo",
// Google Gemini Models
Gemini15Pro: "gemini-1.5-pro",
Gemini15Flash: "gemini-1.5-flash",
Gemini20Flash: "gemini-2.0-flash",
Gemini20FlashLite: "gemini-2.0-flash-lite",
Gemini25FlashLite: "gemini-2.5-flash-lite",
Gemini25Flash: "gemini-2.5-flash",
Gemini3ProPreview: "gemini-3-pro-preview",
Gemini3FlashPreview: "gemini-3-flash-preview",
// Anthropic Claude Models
ClaudeSonnet45: "claude-sonnet-4-5",
ClaudeSonnet4: "claude-sonnet-4",
ClaudeHaiku45: "claude-haiku-4-5",
Claude37Sonnet: "claude-3-7-sonnet",
Claude35Sonnet: "claude-3-5-sonnet",
Claude35SonnetV1: "claude-3-5-sonnet-v1",
Claude3Haiku: "claude-3-haiku",
// Other Models
GrokBeta: "grok-beta",
CustomLlm: "custom-llm",
Qwen34B: "qwen3-4b",
Qwen330Ba3B: "qwen3-30b-a3b",
GptOss20B: "gpt-oss-20b",
GptOss120B: "gpt-oss-120b",
Glm45AirFp8: "glm-45-air-fp8",
WattTool8B: "watt-tool-8b",
WattTool70B: "watt-tool-70b",
// ... and many versioned variants
} as const;
type Llm = (typeof Llm)[keyof typeof Llm];import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
const client = new ElevenLabsClient({ apiKey: "your-api-key" });
// Calculate costs for a simple agent without RAG
const basicCosts = await client.conversationalAi.llmUsage.calculate({
promptLength: 500,
numberOfPages: 0,
ragEnabled: false,
});
console.log("LLM pricing comparison:");
basicCosts.llmPrices.forEach((item) => {
console.log(`${item.llm}: $${item.pricePerMinute.toFixed(4)}/min`);
});// Calculate costs for agent with knowledge base and RAG
const ragCosts = await client.conversationalAi.llmUsage.calculate({
promptLength: 1000,
numberOfPages: 50, // 50 pages in knowledge base
ragEnabled: true,
});
// Find the most cost-effective model
const cheapest = ragCosts.llmPrices.reduce((min, curr) =>
curr.pricePerMinute < min.pricePerMinute ? curr : min
);
console.log(`Most cost-effective: ${cheapest.llm} at $${cheapest.pricePerMinute.toFixed(4)}/min`);// Compare costs with and without RAG
const [withoutRag, withRag] = await Promise.all([
client.conversationalAi.llmUsage.calculate({
promptLength: 800,
numberOfPages: 30,
ragEnabled: false,
}),
client.conversationalAi.llmUsage.calculate({
promptLength: 800,
numberOfPages: 30,
ragEnabled: true,
}),
]);
console.log("Cost comparison for gpt-4o:");
const gpt4oWithout = withoutRag.llmPrices.find(p => p.llm === "gpt-4o");
const gpt4oWith = withRag.llmPrices.find(p => p.llm === "gpt-4o");
if (gpt4oWithout && gpt4oWith) {
const increase = gpt4oWith.pricePerMinute - gpt4oWithout.pricePerMinute;
console.log(`Without RAG: $${gpt4oWithout.pricePerMinute.toFixed(4)}/min`);
console.log(`With RAG: $${gpt4oWith.pricePerMinute.toFixed(4)}/min`);
console.log(`Cost increase: $${increase.toFixed(4)}/min`);
}// Find models under a budget threshold
const budgetThreshold = 0.05; // $0.05 per minute
const affordableModels = await client.conversationalAi.llmUsage.calculate({
promptLength: 600,
numberOfPages: 20,
ragEnabled: true,
});
const underBudget = affordableModels.llmPrices.filter(
(item) => item.pricePerMinute <= budgetThreshold
);
console.log(`Models under $${budgetThreshold}/min:`);
underBudget.forEach((item) => {
console.log(`- ${item.llm}: $${item.pricePerMinute.toFixed(4)}/min`);
});// Calculate how knowledge base size affects costs
async function analyzeCostByKBSize(promptLength: number): Promise<void> {
const kbSizes = [0, 10, 50, 100, 200];
console.log(`Cost analysis for ${promptLength} char prompts:\n`);
for (const pages of kbSizes) {
const costs = await client.conversationalAi.llmUsage.calculate({
promptLength,
numberOfPages: pages,
ragEnabled: pages > 0,
});
const gpt4o = costs.llmPrices.find(p => p.llm === "gpt-4o");
if (gpt4o) {
console.log(`${pages} pages: $${gpt4o.pricePerMinute.toFixed(4)}/min`);
}
}
}
await analyzeCostByKBSize(1000);// Estimate monthly costs based on expected usage
async function estimateMonthlyCost(
avgCallDurationMinutes: number,
callsPerDay: number,
promptLength: number,
kbPages: number
): Promise<void> {
const costs = await client.conversationalAi.llmUsage.calculate({
promptLength,
numberOfPages: kbPages,
ragEnabled: kbPages > 0,
});
const gpt4o = costs.llmPrices.find(p => p.llm === "gpt-4o");
if (gpt4o) {
const costPerCall = gpt4o.pricePerMinute * avgCallDurationMinutes;
const dailyCost = costPerCall * callsPerDay;
const monthlyCost = dailyCost * 30;
console.log("Monthly cost estimate (gpt-4o):");
console.log(`- Cost per minute: $${gpt4o.pricePerMinute.toFixed(4)}`);
console.log(`- Cost per call: $${costPerCall.toFixed(2)}`);
console.log(`- Daily cost: $${dailyCost.toFixed(2)}`);
console.log(`- Monthly cost: $${monthlyCost.toFixed(2)}`);
}
}
await estimateMonthlyCost(5, 100, 800, 50);Calculate expected costs before deploying an agent to production. Compare different LLM models and configurations to find the optimal balance between performance and cost.
Identify the most cost-effective LLM for your specific use case by comparing pricing across models with your actual prompt length and knowledge base size.
Evaluate whether enabling RAG for your agent is cost-effective based on the additional per-minute costs versus the value of enhanced responses.
Determine optimal knowledge base size by analyzing how the number of pages affects per-minute costs across different LLM models.