Java idiomatic SDK for the Gemini Developer APIs and Vertex AI APIs
Create and manage cached content to optimize repeated requests with the same context, reducing latency and token costs. Caching is useful for frequently used system instructions, large documents, or conversation starters.
import com.google.genai.Caches;
import com.google.genai.AsyncCaches;
import com.google.genai.Pager;
import com.google.genai.types.CachedContent;
import com.google.genai.types.CreateCachedContentConfig;
import com.google.genai.types.UpdateCachedContentConfig;
import com.google.genai.types.DeleteCachedContentResponse;package com.google.genai;
public final class Caches {
public CachedContent create(CreateCachedContentConfig config);
public CachedContent get(String name, GetCachedContentConfig config);
public CachedContent update(String name, UpdateCachedContentConfig config);
public DeleteCachedContentResponse delete(String name, DeleteCachedContentConfig config);
public Pager<CachedContent> list(ListCachedContentsConfig config);
}package com.google.genai.types;
public final class CachedContent {
public static Builder builder();
public Optional<String> name();
public Optional<String> displayName();
public Optional<String> model();
public Optional<String> createTime();
public Optional<String> updateTime();
public Optional<String> expireTime();
public Optional<List<Content>> contents();
public Optional<List<Tool>> tools();
public Optional<ToolConfig> toolConfig();
public Optional<Content> systemInstruction();
public Optional<CachedContentUsageMetadata> usageMetadata();
}import com.google.genai.types.CreateCachedContentConfig;
import com.google.genai.types.Content;
import com.google.genai.types.Part;
// Cache a large document
Content document = Content.fromParts(
Part.fromText("Large document content that will be reused...")
);
CreateCachedContentConfig config = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.displayName("Cached Document")
.contents(ImmutableList.of(document))
.ttl("3600s") // Cache for 1 hour
.build();
CachedContent cached = client.caches.create(config);
System.out.println("Cached content: " + cached.name().orElse("N/A"));Content systemInstruction = Content.fromParts(
Part.fromText("You are an expert data analyst. Analyze data carefully and provide detailed insights.")
);
CreateCachedContentConfig config = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.displayName("Data Analyst System")
.systemInstruction(systemInstruction)
.ttl("7200s") // 2 hours
.build();
CachedContent cached = client.caches.create(config);// Reference cached content in generation
GenerateContentConfig genConfig = GenerateContentConfig.builder()
.cachedContent(cached.name().get())
.build();
GenerateContentResponse response = client.models.generateContent(
"gemini-2.0-flash",
"Based on the cached context, answer this question...",
genConfig
);
// Check token savings
response.usageMetadata().ifPresent(usage -> {
System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
System.out.println("Cached tokens: " + usage.cachedContentTokenCount().orElse(0));
});UpdateCachedContentConfig updateConfig = UpdateCachedContentConfig.builder()
.ttl("7200s") // Extend to 2 hours
.build();
CachedContent updated = client.caches.update(cachedName, updateConfig);CachedContent cached = client.caches.get(cachedName, null);
System.out.println("Name: " + cached.name().orElse("N/A"));
System.out.println("Display name: " + cached.displayName().orElse("N/A"));
System.out.println("Model: " + cached.model().orElse("N/A"));
System.out.println("Expires: " + cached.expireTime().orElse("N/A"));
cached.usageMetadata().ifPresent(usage -> {
System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
});Pager<CachedContent> pager = client.caches.list(null);
for (CachedContent cached : pager) {
System.out.println("Cache: " + cached.displayName().orElse("N/A"));
System.out.println(" Model: " + cached.model().orElse("N/A"));
System.out.println(" Expires: " + cached.expireTime().orElse("N/A"));
}DeleteCachedContentResponse response = client.caches.delete(cachedName, null);
System.out.println("Cached content deleted");// Set expiration time
CreateCachedContentConfig config1 = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.contents(contents)
.expireTime("2024-12-31T23:59:59Z") // Specific time
.build();
// Set TTL (time to live)
CreateCachedContentConfig config2 = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.contents(contents)
.ttl("3600s") // 1 hour from creation
.build();// Upload large file
File file = client.files.upload("large-document.pdf", null);
// Cache file reference
Content fileContent = Content.fromParts(
Part.fromUri(file.uri().get(), file.mimeType().get())
);
CreateCachedContentConfig config = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.contents(ImmutableList.of(fileContent))
.ttl("3600s")
.build();
CachedContent cached = client.caches.create(config);
// Use cached content multiple times
for (String query : queries) {
GenerateContentResponse response = client.models.generateContent(
"gemini-2.0-flash",
query,
GenerateContentConfig.builder()
.cachedContent(cached.name().get())
.build()
);
}Content systemInstruction = Content.fromParts(
Part.fromText("You are a customer support agent. Be friendly and helpful.")
);
CreateCachedContentConfig config = CreateCachedContentConfig.builder()
.model("gemini-2.0-flash")
.systemInstruction(systemInstruction)
.ttl("86400s") // 24 hours
.build();
CachedContent cached = client.caches.create(config);
// Use for all customer support conversations
Chat chat = client.chats.create(
"gemini-2.0-flash",
GenerateContentConfig.builder()
.cachedContent(cached.name().get())
.build()
);Install with Tessl CLI
npx tessl i tessl/maven-com-google-genai--google-genaidocs