AWS Bedrock integration for LangChain4j enabling Java applications to interact with various LLM providers through a unified interface
AWS Bedrock prompt caching reduces latency and costs by caching frequently used content.
Automatic cache point insertion at predefined locations. Best for entirely static content.
public enum BedrockCachePointPlacement {
AFTER_SYSTEM, // After system messages
AFTER_USER_MESSAGE, // After first user message
AFTER_TOOLS // After tool definitions
}Use when:
Fine-grained control over which content blocks are cached. Best for mixed static/dynamic content.
public class BedrockSystemMessage implements ChatMessage {
public static final int MAX_CONTENT_BLOCKS = 10;
public static final int MAX_CACHE_POINTS = 4;
public List<BedrockSystemContent> contents();
public boolean hasCachePoints();
public int cachePointCount();
public static Builder builder();
public static BedrockSystemMessage from(String text);
}Use when:
import dev.langchain4j.model.bedrock.BedrockChatModel;
import dev.langchain4j.model.bedrock.BedrockChatRequestParameters;
import dev.langchain4j.model.bedrock.BedrockCachePointPlacement;
BedrockChatRequestParameters params = BedrockChatRequestParameters.builder()
.promptCaching(BedrockCachePointPlacement.AFTER_SYSTEM)
.build();
BedrockChatModel model = BedrockChatModel.builder()
.modelId("anthropic.claude-3-5-sonnet-20241022-v2:0")
.defaultRequestParameters(params)
.build();
// Large system prompt (>1024 tokens)
String systemPrompt = loadLargePrompt();
// First request: cache write
ChatResponse response1 = model.chat(ChatRequest.builder()
.messages(SystemMessage.from(systemPrompt), UserMessage.from("Question 1"))
.build());
// Second request: cache hit (within 5 minutes)
ChatResponse response2 = model.chat(ChatRequest.builder()
.messages(SystemMessage.from(systemPrompt), UserMessage.from("Question 2"))
.build());import dev.langchain4j.model.bedrock.BedrockSystemMessage;
// Mix static and dynamic content
BedrockSystemMessage message = BedrockSystemMessage.builder()
.addTextWithCachePoint(loadStaticKnowledgeBase()) // Static: cache it
.addText("Current date: " + LocalDate.now()) // Dynamic: don't cache
.addTextWithCachePoint(loadStaticInstructions()) // Static: cache it
.build();
ChatResponse response = model.chat(ChatRequest.builder()
.messages(message, UserMessage.from("Question"))
.build());import dev.langchain4j.model.bedrock.BedrockTokenUsage;
ChatResponse response = model.chat(request);
if (response.tokenUsage() instanceof BedrockTokenUsage usage) {
Integer cacheWrite = usage.cacheWriteInputTokens();
Integer cacheRead = usage.cacheReadInputTokens();
if (cacheWrite != null) {
System.out.println("Cache write: " + cacheWrite + " tokens");
}
if (cacheRead != null) {
System.out.println("Cache hit: " + cacheRead + " tokens");
}
}