AWS Bedrock integration for LangChain4j enabling Java applications to interact with various LLM providers through a unified interface
Optimization strategies for effective caching.
Ensure cached content exceeds ~1,024 tokens for activation.
// Too small - won't cache
BedrockSystemMessage tooSmall = BedrockSystemMessage.builder()
.addTextWithCachePoint("Short text") // <1024 tokens - cache won't activate
.build();
// Good - large enough to cache
BedrockSystemMessage goodSize = BedrockSystemMessage.builder()
.addTextWithCachePoint(loadLargeContent()) // >1024 tokens - will cache
.build();Minimize dynamic content in cached sections.
// Bad: Dynamic content invalidates entire cache
BedrockSystemMessage badCaching = BedrockSystemMessage.builder()
.addTextWithCachePoint(
staticContent + "\nDate: " + LocalDate.now() // Cache invalidates daily!
)
.build();
// Good: Separate static and dynamic
BedrockSystemMessage goodCaching = BedrockSystemMessage.builder()
.addTextWithCachePoint(staticContent) // Cached
.addText("\nDate: " + LocalDate.now()) // Not cached
.build();// Request at T+0min: Cache write
// Request at T+4min: Cache hit (TTL reset to T+9min)
// Request at T+8min: Cache hit (TTL reset to T+13min)
// Request at T+15min: Cache miss if no requests between T+8min and T+13minOnly Claude 3.x and Amazon Nova support caching.
List<String> cacheSupportedModels = List.of(
"anthropic.claude-3-5-sonnet-20241022-v2:0",
"anthropic.claude-3-5-sonnet-20240620-v1:0",
"anthropic.claude-3-opus-20240229-v1:0",
"anthropic.claude-3-sonnet-20240229-v1:0",
"anthropic.claude-3-haiku-20240307-v1:0",
"amazon.nova-pro-v1:0",
"amazon.nova-lite-v1:0"
);
String modelId = "anthropic.claude-3-5-sonnet-20241022-v2:0";
if (cacheSupportedModels.contains(modelId)) {
// Enable caching
}// Cost calculation:
// Regular input: 10,000 tokens = 10,000 token cost
// Cache write: 10,000 tokens = 10,000 input + write cost
// Cache read: 10,000 tokens = 1,000 token cost (90% savings)
//
// Break-even: 1 cache write + 2-3 cache readsTrack cache performance via token metrics.
import dev.langchain4j.model.bedrock.BedrockTokenUsage;
ChatResponse response = model.chat(request);
if (response.tokenUsage() instanceof BedrockTokenUsage usage) {
Integer cacheWrite = usage.cacheWriteInputTokens();
Integer cacheRead = usage.cacheReadInputTokens();
Integer regularInput = usage.inputTokenCount();
if (cacheWrite != null) {
System.out.println("Cache write: " + cacheWrite + " tokens");
}
if (cacheRead != null) {
double savings = cacheRead * 0.9;
System.out.println("Cache hit! Saved ~" + savings + " tokens");
}
}Reuse configurations to avoid repeated setup.
// Create once, use many times
private static final BedrockGuardrailConfiguration GUARDRAIL =
BedrockGuardrailConfiguration.builder()
.guardrailIdentifier("my-guardrail")
.guardrailVersion("1")
.build();
private static final BedrockChatRequestParameters CACHED_PARAMS =
BedrockChatRequestParameters.builder()
.promptCaching(BedrockCachePointPlacement.AFTER_SYSTEM)
.guardrailConfiguration(GUARDRAIL)
.build();
BedrockChatModel model = BedrockChatModel.builder()
.modelId("anthropic.claude-3-5-sonnet-20241022-v2:0")
.defaultRequestParameters(CACHED_PARAMS)
.build();Related: