or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

advanced

error-handling.mdtype-inference.md
glossary.mdindex.mdquick-reference.mdtask-index.md
tile.json

models.mddocs/api-reference/

Models API Reference

Complete API reference for universal model initialization and configuration.

initChatModel

/**
 * Initialize a chat model from string identifier or instance
 * @param model - Model string identifier (e.g., "openai:gpt-4o") or model instance
 * @param fields - Optional configuration fields
 * @returns Configured chat model instance
 */
function initChatModel<
  RunInput = any,
  CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions
>(
  model?: string | ChatModel<RunInput, CallOptions>,
  fields?: InitChatModelFields
): ChatModel<RunInput, CallOptions>;

interface InitChatModelFields {
  /**
   * Temperature for sampling (0.0 to 2.0, provider-dependent)
   */
  temperature?: number;

  /**
   * Model name override
   */
  modelName?: string;

  /**
   * Request timeout in milliseconds
   */
  timeout?: number;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Enable streaming
   */
  streaming?: boolean;

  /**
   * Top-p sampling parameter
   */
  topP?: number;

  /**
   * Frequency penalty
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty
   */
  presencePenalty?: number;

  /**
   * Stop sequences
   */
  stop?: string[];

  /**
   * Provider-specific configuration
   */
  [key: string]: any;
}

Supported Providers

type ChatModelProvider =
  | "openai"
  | "azure_openai"
  | "anthropic"
  | "google-vertexai"
  | "google-vertexai-web"
  | "google-genai"
  | "cohere"
  | "mistralai"
  | "mistral"
  | "bedrock"
  | "ollama"
  | "groq"
  | "cerebras"
  | "deepseek"
  | "xai"
  | "fireworks"
  | "together"
  | "perplexity";

ConfigurableModel Class

/**
 * Configurable model wrapper that extends ChatModel
 * Allows runtime model configuration and swapping
 */
class ConfigurableModel<
  RunInput = any,
  CallOptions extends BaseChatModelCallOptions = BaseChatModelCallOptions
> extends ChatModel<RunInput, CallOptions> {
  constructor(fields: ConfigurableModelFields);

  /**
   * Bind tools to the model
   */
  bindTools(
    tools: Tool[],
    options?: BindToolsOptions
  ): ConfigurableModel<RunInput, CallOptions>;

  /**
   * Add structured output support
   */
  withStructuredOutput<T>(
    schema: ZodType<T> | Record<string, any>,
    options?: StructuredOutputOptions
  ): Runnable<RunInput, T>;

  /**
   * Bind configuration
   */
  withConfig(config: RunnableConfig): ConfigurableModel<RunInput, CallOptions>;

  /**
   * Invoke the model
   */
  invoke(input: RunInput, options?: CallOptions): Promise<AIMessage>;

  /**
   * Stream model responses
   */
  stream(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<AIMessageChunk>;

  /**
   * Batch invoke
   */
  batch(inputs: RunInput[], options?: CallOptions): Promise<AIMessage[]>;

  /**
   * Transform generator
   */
  transform(
    generator: AsyncGenerator<RunInput>,
    options?: CallOptions
  ): AsyncGenerator<AIMessageChunk>;

  /**
   * Stream with logging
   */
  streamLog(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<RunStreamLog>;

  /**
   * Stream events
   */
  streamEvents(
    input: RunInput,
    options?: CallOptions
  ): AsyncGenerator<StreamEvent>;

  /**
   * Get model profile information
   */
  get profile(): ModelProfile;
}

interface ConfigurableModelFields {
  /**
   * Model identifier or instance
   */
  model?: string | ChatModel;

  /**
   * Sampling temperature
   */
  temperature?: number;

  /**
   * Model name override
   */
  modelName?: string;

  /**
   * Request timeout
   */
  timeout?: number;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Enable streaming
   */
  streaming?: boolean;

  /**
   * Top-p sampling
   */
  topP?: number;

  /**
   * Frequency penalty
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty
   */
  presencePenalty?: number;

  /**
   * Stop sequences
   */
  stop?: string[];

  /**
   * Provider-specific fields
   */
  [key: string]: any;
}

interface ConfigurableChatModelCallOptions extends BaseChatModelCallOptions {
  /**
   * Override model at call time
   */
  model?: string;
  [key: string]: any;
}

Provider Configuration

/**
 * Provider configuration map
 */
const MODEL_PROVIDER_CONFIG: Record<
  ChatModelProvider,
  {
    packageName: string;
    className: string;
    defaultModelName?: string;
  }
>;

Helper Functions

/**
 * Get model class by class name
 * @param className - Model class name (e.g., "ChatOpenAI")
 * @returns Model class constructor
 */
function getChatModelByClassName(
  className: string
): typeof ChatModel | undefined;

/**
 * Infer provider from model name
 * @param modelName - Model name string
 * @returns Inferred provider name or undefined
 * @internal
 */
function _inferModelProvider(modelName: string): ChatModelProvider | undefined;

Provider-Specific Details

OpenAI

Model Identifiers:

  • openai:gpt-4o - Most capable GPT-4 model
  • openai:gpt-4o-mini - Faster, cheaper GPT-4 variant
  • openai:gpt-4-turbo - GPT-4 Turbo (previous generation)
  • openai:gpt-3.5-turbo - Legacy GPT-3.5

Specific Configuration:

interface OpenAIFields extends InitChatModelFields {
  /**
   * Organization ID for OpenAI API
   */
  organizationId?: string;

  /**
   * OpenAI API key (or use OPENAI_API_KEY env var)
   */
  openAIApiKey?: string;

  /**
   * Custom base URL for OpenAI-compatible APIs
   */
  configuration?: {
    baseURL?: string;
  };

  /**
   * Maximum completion tokens
   */
  maxTokens?: number;

  /**
   * Temperature (0-2, default 1)
   */
  temperature?: number;

  /**
   * Top P sampling (0-1)
   */
  topP?: number;

  /**
   * Frequency penalty (-2 to 2)
   */
  frequencyPenalty?: number;

  /**
   * Presence penalty (-2 to 2)
   */
  presencePenalty?: number;

  /**
   * Response format for JSON mode
   */
  responseFormat?: { type: "json_object" | "text" };

  /**
   * Seed for deterministic sampling
   */
  seed?: number;
}

Features:

  • Native tool calling support
  • Native structured output (JSON mode)
  • Vision (image understanding)
  • Function calling
  • Streaming support
  • Logprobs available

Anthropic

Model Identifiers:

  • anthropic:claude-3-5-sonnet-20241022 - Latest Sonnet (most capable)
  • anthropic:claude-3-5-haiku-20241022 - Latest Haiku (fastest)
  • anthropic:claude-3-opus-20240229 - Claude 3 Opus (legacy)

Specific Configuration:

interface AnthropicFields extends InitChatModelFields {
  /**
   * Anthropic API key (or use ANTHROPIC_API_KEY env var)
   */
  anthropicApiKey?: string;

  /**
   * Maximum tokens to generate
   */
  maxTokens?: number;

  /**
   * Temperature (0-1, default 1)
   */
  temperature?: number;

  /**
   * Top P sampling (0-1)
   */
  topP?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Custom stop sequences
   */
  stopSequences?: string[];
}

Features:

  • Native tool calling support
  • Extended context windows (up to 200K tokens)
  • Vision (image understanding)
  • Prompt caching (cost optimization)
  • Streaming support
  • No native JSON mode (use tool strategy)

Google (Vertex AI / Generative AI)

Model Identifiers:

  • google:gemini-1.5-pro - Most capable Gemini model
  • google:gemini-1.5-flash - Faster Gemini variant
  • google:gemini-1.0-pro - Legacy Gemini 1.0

Specific Configuration:

interface GoogleFields extends InitChatModelFields {
  /**
   * Google API key (for Generative AI)
   */
  apiKey?: string;

  /**
   * Project ID (for Vertex AI)
   */
  projectId?: string;

  /**
   * Location (for Vertex AI)
   */
  location?: string;

  /**
   * Maximum output tokens
   */
  maxOutputTokens?: number;

  /**
   * Temperature (0-2)
   */
  temperature?: number;

  /**
   * Top P sampling
   */
  topP?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Safety settings
   */
  safetySettings?: Array<{
    category: string;
    threshold: string;
  }>;
}

Features:

  • Native tool calling (function calling)
  • Large context windows (up to 1M tokens)
  • Vision and multimodal understanding
  • Safety filters
  • Streaming support

Local Models (Ollama)

Model Identifiers:

  • ollama:llama3.1 - Meta's Llama 3.1
  • ollama:mistral - Mistral AI models
  • ollama:phi3 - Microsoft's Phi-3
  • ollama:codellama - Code-specialized Llama

Specific Configuration:

interface OllamaFields extends InitChatModelFields {
  /**
   * Base URL for Ollama server (default: http://localhost:11434)
   */
  baseUrl?: string;

  /**
   * Model name in Ollama
   */
  model?: string;

  /**
   * Temperature (0-1)
   */
  temperature?: number;

  /**
   * Number of tokens to predict
   */
  numPredict?: number;

  /**
   * Top K sampling
   */
  topK?: number;

  /**
   * Top P sampling
   */
  topP?: number;

  /**
   * Repeat penalty
   */
  repeatPenalty?: number;
}

Features:

  • Fully local execution (no API key required)
  • Free to use
  • Multiple model options
  • Tool calling support (limited)
  • No native structured output
  • Streaming support

Advanced Model Patterns

Model Selection Based on Task

import { initChatModel, createAgent } from "langchain";

function selectModelForTask(taskType: string) {
  switch (taskType) {
    case "simple":
      return initChatModel("openai:gpt-4o-mini", {
        temperature: 0.3,
        maxTokens: 500,
      });
    case "complex":
      return initChatModel("openai:gpt-4o", {
        temperature: 0.7,
        maxTokens: 4000,
      });
    case "creative":
      return initChatModel("anthropic:claude-3-5-sonnet-20241022", {
        temperature: 1.0,
        maxTokens: 4000,
      });
    case "local":
      return initChatModel("ollama:llama3.1");
    default:
      return initChatModel("openai:gpt-4o");
  }
}

const agent = createAgent({
  model: selectModelForTask("complex"),
  tools: [],
});

Dynamic Model Configuration

import { initChatModel, createAgent } from "langchain";

const agent = createAgent({
  model: (state) => {
    // Select model based on state
    const messageCount = state.messages.length;

    if (messageCount > 50) {
      // Use cheaper model for long conversations
      return initChatModel("openai:gpt-4o-mini");
    }

    // Use powerful model for short conversations
    return initChatModel("openai:gpt-4o");
  },
  tools: [],
});

Model with Custom Base URL

import { initChatModel } from "langchain";

// Use OpenAI-compatible API (e.g., Azure, LocalAI, etc.)
const model = initChatModel("openai:gpt-4", {
  configuration: {
    baseURL: "https://your-custom-endpoint.com/v1",
  },
  openAIApiKey: "your-api-key",
});

Model Response Caching

import { initChatModel, createAgent, anthropicPromptCachingMiddleware } from "langchain";

// For Anthropic models, use prompt caching middleware
const agent = createAgent({
  model: "anthropic:claude-3-5-sonnet-20241022",
  tools: [],
  systemPrompt: "You are a helpful assistant with extensive knowledge.",
  middleware: [
    anthropicPromptCachingMiddleware({
      cacheSystemPrompt: true, // Cache the system prompt
      cacheMessages: true, // Cache conversation history
    }),
  ],
});

// Significant cost savings on repeated calls with same context

Multi-Model Agent

import { initChatModel, createAgent } from "langchain";

// Create multiple models for different purposes
const fastModel = initChatModel("openai:gpt-4o-mini");
const powerfulModel = initChatModel("openai:gpt-4o");
const localModel = initChatModel("ollama:llama3.1");

// Use different models in different contexts
async function processTask(task: string, complexity: "low" | "high", online: boolean) {
  let model;

  if (!online) {
    model = localModel;
  } else if (complexity === "low") {
    model = fastModel;
  } else {
    model = powerfulModel;
  }

  const agent = createAgent({ model, tools: [] });
  return await agent.invoke({ messages: [{ role: "user", content: task }] });
}

Performance Optimization

Temperature Guidelines

Low Temperature (0 - 0.3):

  • More deterministic and focused responses
  • Good for: factual answers, code generation, structured data extraction
  • Less creative, more consistent

Medium Temperature (0.4 - 0.7):

  • Balanced between creativity and consistency
  • Good for: general conversation, problem-solving, analysis
  • Default for most use cases

High Temperature (0.8 - 2.0):

  • More random and creative responses
  • Good for: creative writing, brainstorming, diverse perspectives
  • Less predictable, more varied

Token Optimization

Max Tokens:

  • Set appropriate maxTokens to control response length
  • Lower values for concise responses (save cost)
  • Higher values for detailed analysis
  • Consider model's context window limits

Context Window Management:

  • Trim message history for long conversations
  • Use summarization for context compression
  • Remove redundant messages
  • Consider models with larger context windows for complex tasks

Model Selection Cost vs Quality

Cost Tiers (Approximate):

  1. Free/Cheap: Ollama (local), GPT-4o-mini, Claude Haiku
  2. Medium: GPT-4o, Gemini 1.5 Pro
  3. Expensive: Claude Opus, GPT-4 Turbo

Quality Considerations:

  • Cheaper models often sufficient for simple tasks
  • Use expensive models only when necessary
  • Batch simple requests to cheaper models
  • Cache results when possible

Error Handling

Model-Specific Errors

Rate Limiting:

import { initChatModel, createAgent, modelRetryMiddleware } from "langchain";

const agent = createAgent({
  model: "openai:gpt-4o",
  tools: [],
  middleware: [
    modelRetryMiddleware({
      maxRetries: 3,
      initialDelay: 1000, // Start with 1s delay
      maxDelay: 10000, // Max 10s delay
      backoffFactor: 2, // Exponential backoff
    }),
  ],
});

Model Fallback:

import { initChatModel, createAgent, modelFallbackMiddleware } from "langchain";

const agent = createAgent({
  model: "openai:gpt-4o",
  tools: [],
  middleware: [
    modelFallbackMiddleware({
      fallbackModels: [
        "openai:gpt-4o-mini", // Try cheaper model
        "anthropic:claude-3-5-haiku-20241022", // Try different provider
        "ollama:llama3.1", // Try local model
      ],
    }),
  ],
});

Authentication Errors

Environment Variables:

# OpenAI
export OPENAI_API_KEY="sk-..."

# Anthropic
export ANTHROPIC_API_KEY="sk-ant-..."

# Google
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json"
export GOOGLE_API_KEY="..." # For Generative AI

# Cohere
export COHERE_API_KEY="..."

# Groq
export GROQ_API_KEY="gsk_..."

Programmatic Configuration:

import { initChatModel } from "langchain";

const model = initChatModel("openai:gpt-4o", {
  openAIApiKey: process.env.OPENAI_API_KEY,
  timeout: 30000, // 30 second timeout
  maxRetries: 3, // Retry failed requests
});

Model Comparison

ProviderBest ForContext WindowTool SupportVisionCost
OpenAI GPT-4oGeneral purpose128K tokensNativeYes$$$
OpenAI GPT-4o-miniSimple tasks128K tokensNativeYes$
Claude 3.5 SonnetComplex reasoning200K tokensNativeYes$$$
Claude 3.5 HaikuFast responses200K tokensNativeYes$
Gemini 1.5 ProLarge context1M tokensNativeYes$$
Gemini 1.5 FlashSpeed + context1M tokensNativeYes$
Ollama (Local)Privacy, offlineModel-dependentLimitedSomeFree

Related Documentation

  • Model Guide - Complete usage guide
  • Agent API Reference - Using models with agents
  • Quick Reference - Quick model patterns
  • Performance Optimization - Optimization strategies