The Messages API is the core interface for conversational interactions with Claude models. It supports text, images, documents, tool use, thinking, prompt caching, and both streaming and non-streaming modes.
The Messages API allows you to:
class Messages extends APIResource {
// Sub-resources
batches: Batches;
// Methods
create(params: MessageCreateParamsNonStreaming, options?: RequestOptions): APIPromise<Message>;
create(params: MessageCreateParamsStreaming, options?: RequestOptions): APIPromise<Stream<RawMessageStreamEvent>>;
stream(params: MessageStreamParams, options?: RequestOptions): MessageStream;
countTokens(params: MessageCountTokensParams, options?: RequestOptions): APIPromise<MessageTokensCount>;
}client.messages.create(params: MessageCreateParams): APIPromise<Message>;
interface MessageCreateParams {
model: string; // Required: Model identifier
max_tokens: number; // Required: Maximum tokens to generate
messages: MessageParam[]; // Required: Conversation messages
// Optional parameters
stream?: boolean; // Enable streaming
system?: string | SystemBlockParam[]; // System prompt
temperature?: number; // 0-1, default varies by model
top_k?: number; // Top-k sampling
top_p?: number; // 0-1, nucleus sampling
stop_sequences?: string[]; // Stop generation on these sequences
metadata?: Metadata; // User-defined metadata
tools?: Tool[]; // Available tools
tool_choice?: ToolChoice; // Tool selection strategy
thinking?: ThinkingConfigParam; // Extended thinking configuration
citations?: CitationsConfigParam; // Citation generation
}Example:
import Anthropic from '@anthropic-ai/sdk';
const client = new Anthropic();
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: 'What is the capital of France?'
}
],
});
console.log(message.content[0].text); // "The capital of France is Paris."const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: 'Hello, my name is Alice.'
},
{
role: 'assistant',
content: 'Hello Alice! Nice to meet you. How can I help you today?'
},
{
role: 'user',
content: 'What did I just tell you my name was?'
}
],
});
console.log(message.content[0].text); // "You told me your name is Alice."System prompts guide Claude's behavior:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
system: 'You are a helpful assistant that speaks like a pirate.',
messages: [
{
role: 'user',
content: 'Tell me about the weather.'
}
],
});Advanced system with cache control:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
system: [
{
type: 'text',
text: 'You are an expert in quantum physics.',
},
{
type: 'text',
text: 'Here is a large corpus of physics papers...',
cache_control: { type: 'ephemeral' }, // Cache this content
}
],
messages: [
{
role: 'user',
content: 'Explain quantum entanglement.'
}
],
});interface MessageParam {
role: 'user' | 'assistant';
content: string | ContentBlockParam[];
}
// Simple text content
type MessageParam = {
role: 'user' | 'assistant';
content: string;
};
// Structured content blocks
type MessageParam = {
role: 'user' | 'assistant';
content: ContentBlockParam[];
};interface Message {
id: string; // Unique message ID
type: 'message';
role: 'assistant';
content: ContentBlock[]; // Generated content blocks
model: string; // Model that generated response
stop_reason: StopReason | null;
stop_sequence: string | null;
usage: Usage; // Token usage information
_request_id?: string; // Request ID for debugging
}
type StopReason =
| 'end_turn' // Natural completion
| 'max_tokens' // Reached max_tokens limit
| 'stop_sequence' // Hit a stop sequence
| 'tool_use' // Wants to use a tool
| 'server_tool_use' // Server-side tool execution
;
interface Usage {
input_tokens: number;
output_tokens: number;
cache_creation_input_tokens?: number; // Tokens cached (first use)
cache_read_input_tokens?: number; // Tokens read from cache
}type ContentBlockParam =
| TextBlockParam
| ImageBlockParam
| DocumentBlockParam
| ToolUseBlockParam
| ToolResultBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
| ServerToolUseBlockParam
| SearchResultBlockParam
| WebSearchResultBlockParam
;
// Text content
interface TextBlockParam {
type: 'text';
text: string;
cache_control?: CacheControlEphemeral;
citations?: TextCitationParam[];
}
// Image content
interface ImageBlockParam {
type: 'image';
source: Base64ImageSource | URLImageSource;
cache_control?: CacheControlEphemeral;
}
interface Base64ImageSource {
type: 'base64';
media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
data: string; // Base64-encoded image
}
interface URLImageSource {
type: 'url';
url: string;
}
// Document content (PDFs, text files)
interface DocumentBlockParam {
type: 'document';
source: Base64PDFSource | URLPDFSource | PlainTextSource;
cache_control?: CacheControlEphemeral;
}
interface Base64PDFSource {
type: 'base64';
media_type: 'application/pdf';
data: string; // Base64-encoded PDF
}
interface URLPDFSource {
type: 'url';
url: string;
media_type: 'application/pdf';
}
interface PlainTextSource {
type: 'text';
media_type: 'text/plain';
data: string;
}
// Tool use request (from assistant)
interface ToolUseBlockParam {
type: 'tool_use';
id: string;
name: string;
input: Record<string, any>;
cache_control?: CacheControlEphemeral;
}
// Tool result (from user)
interface ToolResultBlockParam {
type: 'tool_result';
tool_use_id: string;
content?: string | ContentBlockParam[];
is_error?: boolean;
cache_control?: CacheControlEphemeral;
}type ContentBlock =
| TextBlock
| ThinkingBlock
| RedactedThinkingBlock
| ToolUseBlock
| ServerToolUseBlock
| WebSearchResultBlock
;
interface TextBlock {
type: 'text';
text: string;
citations?: TextCitation[];
}
interface ThinkingBlock {
type: 'thinking';
thinking: string;
signature?: string;
}
interface RedactedThinkingBlock {
type: 'redacted_thinking';
signature: string;
}
interface ToolUseBlock {
type: 'tool_use';
id: string;
name: string;
input: Record<string, any>;
}
interface ServerToolUseBlock {
type: 'server_tool_use';
id: string;
name: string;
input: Record<string, any>;
}
interface WebSearchResultBlock {
type: 'web_search_result';
query: string;
results: Array<{
url: string;
title: string;
snippet: string;
}>;
}const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/jpeg',
data: '/9j/4AAQSkZJRgABAQAA...', // Base64 image data
},
},
{
type: 'text',
text: 'What is in this image?',
},
],
},
],
});From URL:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: [
{
type: 'image',
source: {
type: 'url',
url: 'https://example.com/image.jpg',
},
},
{
type: 'text',
text: 'Describe this image.',
},
],
},
],
});import fs from 'fs';
const pdfData = fs.readFileSync('document.pdf', 'base64');
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: [
{
type: 'document',
source: {
type: 'base64',
media_type: 'application/pdf',
data: pdfData,
},
},
{
type: 'text',
text: 'Summarize this document.',
},
],
},
],
});Control randomness and creativity:
// More creative (higher randomness)
const creative = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
temperature: 1.0, // Range: 0-1
messages: [
{
role: 'user',
content: 'Write a creative story about a robot.',
}
],
});
// More focused (lower randomness)
const focused = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
temperature: 0.2,
top_p: 0.9, // Nucleus sampling
messages: [
{
role: 'user',
content: 'What is 2+2?',
}
],
});Stop generation at specific strings:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
stop_sequences: ['\n\nHuman:', 'END'],
messages: [
{
role: 'user',
content: 'Count from 1 to 10, one per line.',
}
],
});
// Will stop if it generates '\n\nHuman:' or 'END'
if (message.stop_reason === 'stop_sequence') {
console.log('Stopped at:', message.stop_sequence);
}Attach custom metadata for tracking:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
metadata: {
user_id: 'user_123',
session_id: 'session_456',
environment: 'production',
},
messages: [
{
role: 'user',
content: 'Hello!',
}
],
});Cache frequently used content to reduce latency and costs:
// First request - creates cache
const message1 = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
system: [
{
type: 'text',
text: 'You are an expert legal analyst.',
},
{
type: 'text',
text: largeDocumentText, // Large document
cache_control: {
type: 'ephemeral',
ttl: '5m', // or '1h'
},
},
],
messages: [
{
role: 'user',
content: 'Analyze section 1.',
}
],
});
console.log(message1.usage);
// {
// input_tokens: 1000,
// cache_creation_input_tokens: 5000, // Cached
// output_tokens: 500
// }
// Second request - uses cache
const message2 = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
system: [
{
type: 'text',
text: 'You are an expert legal analyst.',
},
{
type: 'text',
text: largeDocumentText, // Same content
cache_control: { type: 'ephemeral', ttl: '5m' },
},
],
messages: [
{
role: 'user',
content: 'Analyze section 2.', // Different question
}
],
});
console.log(message2.usage);
// {
// input_tokens: 1000,
// cache_read_input_tokens: 5000, // Read from cache
// output_tokens: 500
// }Cache TTL options:
5m: 5 minutes (default)1h: 1 hourCaching works for:
Access Claude's reasoning process:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 4096,
thinking: {
type: 'enabled',
budget_tokens: 2000, // Max tokens for thinking
},
messages: [
{
role: 'user',
content: 'Solve this complex math problem: ...',
}
],
});
// Response includes thinking blocks
for (const block of message.content) {
if (block.type === 'thinking') {
console.log('Reasoning:', block.thinking);
console.log('Signature:', block.signature);
} else if (block.type === 'text') {
console.log('Answer:', block.text);
}
}Disable thinking:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
thinking: {
type: 'disabled',
},
messages: [/* ... */],
});Request citations for generated text:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
citations: {
enabled: true,
},
messages: [
{
role: 'user',
content: [
{
type: 'document',
source: {
type: 'text',
media_type: 'text/plain',
data: 'The capital of France is Paris. It has a population of 2.1 million.',
},
},
{
type: 'text',
text: 'What is the population of the capital?',
},
],
}
],
});
// Response includes citations
const textBlock = message.content[0];
if (textBlock.type === 'text' && textBlock.citations) {
for (const citation of textBlock.citations) {
console.log('Cited text:', citation.cited_text);
console.log('Location:', citation.location);
}
}Count tokens before creating a message:
client.messages.countTokens(params: MessageCountTokensParams): APIPromise<MessageTokensCount>;
interface MessageCountTokensParams {
model: string;
messages: MessageParam[];
system?: string | SystemBlockParam[];
tools?: MessageCountTokensTool[];
tool_choice?: ToolChoice;
}
interface MessageTokensCount {
input_tokens: number;
}Example:
const tokenCount = await client.messages.countTokens({
model: 'claude-sonnet-4-5-20250929',
messages: [
{
role: 'user',
content: 'What is the weather like today?',
}
],
});
console.log('Input tokens:', tokenCount.input_tokens);
// Use count to validate before expensive operation
if (tokenCount.input_tokens < 1000) {
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: 'What is the weather like today?',
}
],
});
}const message = await client.messages.create({ /* ... */ });
console.log(message.content);
console.log(message.usage);const { data, response, request_id } = await client.messages
.create({ /* ... */ })
.withResponse();
console.log('Message:', data);
console.log('Status:', response.status);
console.log('Request ID:', request_id);
console.log('Rate limit remaining:', response.headers.get('x-ratelimit-remaining'));const response = await client.messages
.create({ /* ... */ })
.asResponse();
console.log('Status:', response.status);
console.log('Headers:', response.headers);
// Parse body manually if needed
const message = await response.json();For real-time responses, use streaming mode:
const stream = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
stream: true, // Enable streaming
messages: [
{
role: 'user',
content: 'Write a haiku about programming.',
}
],
});
for await (const event of stream) {
if (event.type === 'content_block_delta') {
if (event.delta.type === 'text_delta') {
process.stdout.write(event.delta.text);
}
}
}Or use the enhanced stream helper:
const stream = client.messages.stream({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: 'Write a story.',
}
],
});
stream.on('text', (text) => {
console.log('Text delta:', text);
});
const message = await stream.finalMessage();
console.log('Complete message:', message);See streaming.md for complete streaming documentation.
Enable Claude to call functions:
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
tools: [
{
name: 'get_weather',
description: 'Get the current weather in a location',
input_schema: {
type: 'object',
properties: {
location: {
type: 'string',
description: 'City and state, e.g., San Francisco, CA',
},
unit: {
type: 'string',
enum: ['celsius', 'fahrenheit'],
description: 'Temperature unit',
},
},
required: ['location'],
},
},
],
messages: [
{
role: 'user',
content: 'What is the weather in San Francisco?',
}
],
});
// Check if Claude wants to use a tool
if (message.stop_reason === 'tool_use') {
const toolUse = message.content.find(block => block.type === 'tool_use');
console.log('Tool:', toolUse.name);
console.log('Input:', toolUse.input);
// Execute tool and continue conversation
const toolResult = getWeather(toolUse.input.location);
const followUp = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
tools: [/* same tools */],
messages: [
{
role: 'user',
content: 'What is the weather in San Francisco?',
},
{
role: 'assistant',
content: message.content, // Include tool use
},
{
role: 'user',
content: [
{
type: 'tool_result',
tool_use_id: toolUse.id,
content: toolResult,
},
],
},
],
});
}See tools.md for comprehensive tool documentation including automatic execution with toolRunner().
Available models:
type Model =
| 'claude-opus-4-5-20250514'
| 'claude-sonnet-4-5-20250929'
| 'claude-3-5-sonnet-20241022'
| 'claude-3-5-haiku-20241022'
| 'claude-3-opus-20240229'
| 'claude-3-sonnet-20240229'
| 'claude-3-haiku-20240307'
// ... and more
;Choose based on your needs:
Handle API errors:
try {
const message = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: [
{
role: 'user',
content: 'Hello!',
}
],
});
} catch (error) {
if (error instanceof Anthropic.APIError) {
console.error('Status:', error.status);
console.error('Message:', error.message);
console.error('Request ID:', error.requestID);
if (error.status === 429) {
// Rate limit - retry with backoff
} else if (error.status === 529) {
// Overloaded - wait and retry
}
}
throw error;
}See errors.md for complete error documentation.
// ✅ Good: Maintain conversation state
const conversation: MessageParam[] = [
{ role: 'user', content: 'Hi, I am Alice.' },
];
const response1 = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: conversation,
});
// Add assistant response to conversation
conversation.push({
role: 'assistant',
content: response1.content,
});
// Continue conversation
conversation.push({
role: 'user',
content: 'What is my name?',
});
const response2 = await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
messages: conversation,
});// ✅ Good: Check token usage
const message = await client.messages.create({ /* ... */ });
console.log('Tokens used:', message.usage.input_tokens + message.usage.output_tokens);
// ✅ Good: Use countTokens for validation
const count = await client.messages.countTokens({ /* ... */ });
if (count.input_tokens > 100000) {
console.warn('Input is very large, consider summarizing');
}// ✅ Good: Cache expensive content
// Mark only the content you want to reuse
const baseSystem = [
{ type: 'text', text: 'You are a helpful assistant.' },
{
type: 'text',
text: largeKnowledgeBase,
cache_control: { type: 'ephemeral', ttl: '1h' }, // Cache for 1 hour
},
];
// Reuse across multiple requests
for (const question of questions) {
await client.messages.create({
model: 'claude-sonnet-4-5-20250929',
max_tokens: 1024,
system: baseSystem, // Reuses cached content
messages: [{ role: 'user', content: question }],
});
}