CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/npm-langchain

TypeScript framework for building LLM-powered applications with agents, tools, middleware, and model interoperability

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

models.mddocs/api-reference/

Models API Reference

Complete API reference for universal model initialization and configuration.

initChatModel

/**
 * Initialize a chat model from string identifier or instance
 * @param model - Model string identifier (e.g., "openai:gpt-4o") or model instance
 * @param fields - Optional configuration fields
 * @returns Configured chat model instance
 */
function initChatModel<
  RunInput = any,
  CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions
>(
  model?: string | ChatModel<RunInput, CallOptions>,
  fields?: InitChatModelFields
): ChatModel<RunInput, CallOptions>;

interface InitChatModelFields {
  /**
   * Temperature for sampling (0.0 to 2.0, provider-dependent)
   */
  temperature?: number;

  /**
   * Model name override
   */
  modelName?: string;

  /**
   * Request timeout in milliseconds
   */
  timeout?: number;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Enable streaming
   */
  streaming?: boolean;

  /**
   * Top-p sampling parameter
   */
  topP?: number;

  /**
   * Frequency penalty
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty
   */
  presencePenalty?: number;

  /**
   * Stop sequences
   */
  stop?: string[];

  /**
   * Provider-specific configuration
   */
  [key: string]: any;
}

Supported Providers

type ChatModelProvider =
  | "openai"
  | "azure_openai"
  | "anthropic"
  | "google-vertexai"
  | "google-vertexai-web"
  | "google-genai"
  | "cohere"
  | "mistralai"
  | "mistral"
  | "bedrock"
  | "ollama"
  | "groq"
  | "cerebras"
  | "deepseek"
  | "xai"
  | "fireworks"
  | "together"
  | "perplexity";

ConfigurableModel Class

/**
 * Configurable model wrapper that extends ChatModel
 * Allows runtime model configuration and swapping
 */
class ConfigurableModel<
  RunInput = any,
  CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions
> extends ChatModel<RunInput, CallOptions> {
  constructor(fields: ConfigurableModelFields);

  /**
   * Bind tools to the model
   */
  bindTools(
    tools: Tool[],
    options?: BindToolsOptions
  ): ConfigurableModel<RunInput, CallOptions>;

  /**
   * Add structured output support
   */
  withStructuredOutput<T>(
    schema: ZodType<T> | Record<string, any>,
    options?: StructuredOutputOptions
  ): Runnable<RunInput, T>;

  /**
   * Bind configuration
   */
  withConfig(config: RunnableConfig): ConfigurableModel<RunInput, CallOptions>;

  /**
   * Invoke the model
   */
  invoke(input: RunInput, options?: CallOptions): Promise<AIMessage>;

  /**
   * Stream model responses
   */
  stream(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<AIMessageChunk>;

  /**
   * Batch invoke
   */
  batch(inputs: RunInput[], options?: CallOptions): Promise<AIMessage[]>;

  /**
   * Transform generator
   */
  transform(
    generator: AsyncGenerator<RunInput>,
    options?: CallOptions
  ): AsyncGenerator<AIMessageChunk>;

  /**
   * Stream with logging
   */
  streamLog(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<RunStreamLog>;

  /**
   * Stream events
   */
  streamEvents(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<StreamEvent>;

  /**
   * Get model profile information
   */
  get profile(): ModelProfile;
}

interface ConfigurableModelFields {
  /**
   * Model identifier or instance
   */
  model?: string | ChatModel;

  /**
   * Sampling temperature
   */
  temperature?: number;

  /**
   * Model name override
   */
  modelName?: string;

  /**
   * Request timeout
   */
  timeout?: number;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Enable streaming
   */
  streaming?: boolean;

  /**
   * Top-p sampling
   */
  topP?: number;

  /**
   * Frequency penalty
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty
   */
  presencePenalty?: number;

  /**
   * Stop sequences
   */
  stop?: string[];

  /**
   * Provider-specific fields
   */
  [key: string]: any;
}

interface ConfigurableChatModelCallOptions extends BaseChatModelCallOptions {
  /**
   * Override model at call time
   */
  model?: string;
  [key: string]: any;
}

Provider Configuration

/**
 * Provider configuration map
 */
const MODEL_PROVIDER_CONFIG: Record<
  ChatModelProvider,
  {
    packageName: string;
    className: string;
    defaultModelName?: string;
  }
>;

Helper Functions

/**
 * Get model class by class name
 * @param className - Model class name (e.g., "ChatOpenAI")
 * @returns Model class constructor
 */
function getChatModelByClassName(
  className: string
): typeof ChatModel | undefined;

/**
 * Infer provider from model name
 * @param modelName - Model name string
 * @returns Inferred provider name or undefined
 * @internal
 */
function _inferModelProvider(modelName: string): ChatModelProvider | undefined;

Provider-Specific Details

OpenAI

Model Identifiers:

  • openai:gpt-4o - Most capable GPT-4 model
  • openai:gpt-4o-mini - Faster, cheaper GPT-4 variant
  • openai:gpt-4-turbo - GPT-4 Turbo (previous generation)
  • openai:gpt-3.5-turbo - Legacy GPT-3.5

Specific Configuration:

interface OpenAIFields extends InitChatModelFields {
  /**
   * Organization ID for OpenAI API
   */
  organizationId?: string;

  /**
   * OpenAI API key (or use OPENAI_API_KEY env var)
   */
  openAIApiKey?: string;

  /**
   * Custom base URL for OpenAI-compatible APIs
   */
  configuration?: {
    baseURL?: string;
  };

  /**
   * Maximum completion tokens
   */
  maxTokens?: number;

  /**
   * Temperature (0-2, default 1)
   */
  temperature?: number;

  /**
   * Top P sampling (0-1)
   */
  topP?: number;

  /**
   * Frequency penalty (-2 to 2)
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty (-2 to 2)
   */
  presencePenalty?: number;

  /**
   * Response format for JSON mode
   */
  responseFormat?: { type: "json_object" | "text" };

  /**
   * Seed for deterministic sampling
   */
  seed?: number;
}

Features:

  • Native tool calling support
  • Native structured output (JSON mode)
  • Vision (image understanding)
  • Function calling
  • Streaming support
  • Logprobs available

Anthropic

Model Identifiers:

  • anthropic:claude-3-5-sonnet-20241022 - Latest Sonnet (most capable)
  • anthropic:claude-3-5-haiku-20241022 - Latest Haiku (fastest)
  • anthropic:claude-3-opus-20240229 - Claude 3 Opus (legacy)

Specific Configuration:

interface AnthropicFields extends InitChatModelFields {
  /**
   * Anthropic API key (or use ANTHROPIC_API_KEY env var)
   */
  anthropicApiKey?: string;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Temperature (0-1, default 1)
   */
  temperature?: number;

  /**
   * Top P sampling (0-1)
   */
  topP?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Custom stop sequences
   */
  stopSequences?: string[];
}

Features:

  • Native tool calling support
  • Extended context windows (up to 200K tokens)
  • Vision (image understanding)
  • Prompt caching (cost optimization)
  • Streaming support
  • No native JSON mode (use tool strategy)

Google (Vertex AI / Generative AI)

Model Identifiers:

  • google:gemini-1.5-pro - Most capable Gemini model
  • google:gemini-1.5-flash - Faster Gemini variant
  • google:gemini-1.0-pro - Legacy Gemini 1.0

Specific Configuration:

interface GoogleFields extends InitChatModelFields {
  /**
   * Google API key (for Generative AI)
   */
  apiKey?: string;

  /**
   * Project ID (for Vertex AI)
   */
  projectId?: string;

  /**
   * Location (for Vertex AI)
   */
  location?: string;

  /**
   * Maximum output tokens
   */
  maxOutputTokens?: number;

  /**
   * Temperature (0-2)
   */
  temperature?: number;

  /**
   * Top P sampling
   */
  topP?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Safety settings
   */
  safetySettings?: Array<{
    category: string;
    threshold: string;
  }>;
}

Features:

  • Native tool calling (function calling)
  • Large context windows (up to 1M tokens)
  • Vision and multimodal understanding
  • Safety filters
  • Streaming support

Local Models (Ollama)

Model Identifiers:

  • ollama:llama3.1 - Meta's Llama 3.1
  • ollama:mistral - Mistral AI models
  • ollama:phi3 - Microsoft's Phi-3
  • ollama:codellama - Code-specialized Llama

Specific Configuration:

interface OllamaFields extends InitChatModelFields {
  /**
   * Base URL for Ollama server (default: http://localhost:11434)
   */
  baseUrl?: string;

  /**
   * Model name in Ollama
   */
  model?: string;

  /**
   * Temperature (0-1)
   */
  temperature?: number;

  /**
   * Number of tokens to predict
   */
  numPredict?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Top P sampling
   */
  topP?: number;

  /**
   * Repeat penalty
   */
  repeatPenalty?: number;
}

Features:

  • Fully local execution (no API key required)
  • Free to use
  • Multiple model options
  • Tool calling support (limited)
  • No native structured output
  • Streaming support

Advanced Model Patterns

Model Selection Based on Task

import { initChatModel, createAgent } from "langchain";

function selectModelForTask(taskType: string) {
  switch (taskType) {
    case "simple":
      return initChatModel("openai:gpt-4o-mini", {
        temperature: 0.3,
        maxTokens: 500,
      });
    case "complex":
      return initChatModel("openai:gpt-4o", {
        temperature: 0.7,
        maxTokens: 4000,
      });
    case "creative":
      return initChatModel("anthropic:claude-3-5-sonnet-20241022", {
        temperature: 1.0,
        maxTokens: 4000,
      });
    case "local":
      return initChatModel("ollama:llama3.1");
    default:
      return initChatModel("openai:gpt-4o");
  }
}

const agent = createAgent({
  model: selectModelForTask("complex"),
  tools: [],
});

Dynamic Model Configuration

import { initChatModel, createAgent } from "langchain";

const agent = createAgent({
  model: (state) => {
    // Select model based on state
    const messageCount = state.messages.length;

    if (messageCount > 50) {
      // Use cheaper model for long conversations
      return initChatModel("openai:gpt-4o-mini");
    }

    // Use powerful model for short conversations
    return initChatModel("openai:gpt-4o");
  },
  tools: [],
});

Model with Custom Base URL

import { initChatModel } from "langchain";

// Use OpenAI-compatible API (e.g., Azure, LocalAI, etc.)
const model = initChatModel("openai:gpt-4", {
  configuration: {
    baseURL: "https://your-custom-endpoint.com/v1",
  },
  openAIApiKey: "your-api-key",
});

Model Response Caching

import { initChatModel, createAgent, anthropicPromptCachingMiddleware } from "langchain";

// For Anthropic models, use prompt caching middleware
const agent = createAgent({
  model: "anthropic:claude-3-5-sonnet-20241022",
  tools: [],
  systemPrompt: "You are a helpful assistant with extensive knowledge.",
  middleware: [
    anthropicPromptCachingMiddleware({
      cacheSystemPrompt: true, // Cache the system prompt
      cacheMessages: true, // Cache conversation history
    }),
  ],
});

// Significant cost savings on repeated calls with same context

Multi-Model Agent

import { initChatModel, createAgent } from "langchain";

// Create multiple models for different purposes
const fastModel = initChatModel("openai:gpt-4o-mini");
const powerfulModel = initChatModel("openai:gpt-4o");
const localModel = initChatModel("ollama:llama3.1");

// Use different models in different contexts
async function processTask(task: string, complexity: "low" | "high", online: boolean) {
  let model;

  if (!online) {
    model = localModel;
  } else if (complexity === "low") {
    model = fastModel;
  } else {
    model = powerfulModel;
  }

  const agent = createAgent({ model, tools: [] });
  return await agent.invoke({ messages: [{ role: "user", content: task }] });
}

Performance Optimization

Temperature Guidelines

Low Temperature (0 - 0.3):

  • More deterministic and focused responses
  • Good for: factual answers, code generation, structured data extraction
  • Less creative, more consistent

Medium Temperature (0.4 - 0.7):

  • Balanced between creativity and consistency
  • Good for: general conversation, problem-solving, analysis
  • Default for most use cases

High Temperature (0.8 - 2.0):

  • More random and creative responses
  • Good for: creative writing, brainstorming, diverse perspectives
  • Less predictable, more varied

Token Optimization

Max Tokens:

  • Set appropriate maxTokens to control response length
  • Lower values for concise responses (save cost)
  • Higher values for detailed analysis
  • Consider model's context window limits

Context Window Management:

  • Trim message history for long conversations
  • Use summarization for context compression
  • Remove redundant messages
  • Consider models with larger context windows for complex tasks

Model Selection Cost vs Quality

Cost Tiers (Approximate):

  1. Free/Cheap: Ollama (local), GPT-4o-mini, Claude Haiku
  2. Medium: GPT-4o, Gemini 1.5 Pro
  3. Expensive: Claude Opus, GPT-4 Turbo

Quality Considerations:

  • Cheaper models often sufficient for simple tasks
  • Use expensive models only when necessary
  • Batch simple requests to cheaper models
  • Cache results when possible

Error Handling

Model-Specific Errors

Rate Limiting:

import { initChatModel, createAgent, modelRetryMiddleware } from "langchain";

const agent = createAgent({
  model: "openai:gpt-4o",
  tools: [],
  middleware: [
    modelRetryMiddleware({
      maxRetries: 3,
      initialDelay: 1000, // Start with 1s delay
      maxDelay: 10000, // Max 10s delay
      backoffFactor: 2, // Exponential backoff
    }),
  ],
});

Model Fallback:

import { initChatModel, createAgent, modelFallbackMiddleware } from "langchain";

const agent = createAgent({
  model: "openai:gpt-4o",
  tools: [],
  middleware: [
    modelFallbackMiddleware({
      fallbackModels: [
        "openai:gpt-4o-mini", // Try cheaper model
        "anthropic:claude-3-5-haiku-20241022", // Try different provider
        "ollama:llama3.1", // Try local model
      ],
    }),
  ],
});

Authentication Errors

Environment Variables:

# OpenAI
export OPENAI_API_KEY="sk-..."

# Anthropic
export ANTHROPIC_API_KEY="sk-ant-..."

# Google
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
export GOOGLE_API_KEY="..." # For Generative AI

# Cohere
export COHERE_API_KEY="..."

# Groq
export GROQ_API_KEY="gsk_..."

Programmatic Configuration:

import { initChatModel } from "langchain";

const model = initChatModel("openai:gpt-4o", {
  openAIApiKey: process.env.OPENAI_API_KEY,
  timeout: 30000, // 30 second timeout
  maxRetries: 3, // Retry failed requests
});

Model Comparison

ProviderBest ForContext WindowTool SupportVisionCost
OpenAI GPT-4oGeneral purpose128K tokensNativeYes$$$
OpenAI GPT-4o-miniSimple tasks128K tokensNativeYes$
Claude 3.5 SonnetComplex reasoning200K tokensNativeYes$$$
Claude 3.5 HaikuFast responses200K tokensNativeYes$
Gemini 1.5 ProLarge context1M tokensNativeYes$$
Gemini 1.5 FlashSpeed + context1M tokensNativeYes$
Ollama (Local)Privacy, offlineModel-dependentLimitedSomeFree

Related Documentation

  • Model Guide - Complete usage guide
  • Agent API Reference - Using models with agents
  • Quick Reference - Quick model patterns
  • Performance Optimization - Optimization strategies

docs

glossary.md

index.md

quick-reference.md

task-index.md

tile.json