tessl/maven-com-google-genai--google-genai

Java idiomatic SDK for the Gemini Developer APIs and Vertex AI APIs

Overview

Eval results

Files

Cached Content

Name: tessl/maven-com-google-genai--google-genai
Author: tessl

Create and manage cached content to optimize repeated requests with the same context, reducing latency and token costs. Caching is useful for frequently used system instructions, large documents, or conversation starters.

Core Imports

import com.google.genai.Caches;
import com.google.genai.AsyncCaches;
import com.google.genai.Pager;
import com.google.genai.types.CachedContent;
import com.google.genai.types.CreateCachedContentConfig;
import com.google.genai.types.UpdateCachedContentConfig;
import com.google.genai.types.DeleteCachedContentResponse;

Caches Service

package com.google.genai;

public final class Caches {
  public CachedContent create(CreateCachedContentConfig config);
  public CachedContent get(String name, GetCachedContentConfig config);
  public CachedContent update(String name, UpdateCachedContentConfig config);
  public DeleteCachedContentResponse delete(String name, DeleteCachedContentConfig config);
  public Pager<CachedContent> list(ListCachedContentsConfig config);
}

Cached Content Type

package com.google.genai.types;

public final class CachedContent {
  public static Builder builder();

  public Optional<String> name();
  public Optional<String> displayName();
  public Optional<String> model();
  public Optional<String> createTime();
  public Optional<String> updateTime();
  public Optional<String> expireTime();
  public Optional<List<Content>> contents();
  public Optional<List<Tool>> tools();
  public Optional<ToolConfig> toolConfig();
  public Optional<Content> systemInstruction();
  public Optional<CachedContentUsageMetadata> usageMetadata();
}

Create Cached Content

import com.google.genai.types.CreateCachedContentConfig;
import com.google.genai.types.Content;
import com.google.genai.types.Part;

// Cache a large document
Content document = Content.fromParts(
    Part.fromText("Large document content that will be reused...")
);

CreateCachedContentConfig config = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .displayName("Cached Document")
    .contents(ImmutableList.of(document))
    .ttl("3600s") // Cache for 1 hour
    .build();

CachedContent cached = client.caches.create(config);
System.out.println("Cached content: " + cached.name().orElse("N/A"));

Cache with System Instruction

Content systemInstruction = Content.fromParts(
    Part.fromText("You are an expert data analyst. Analyze data carefully and provide detailed insights.")
);

CreateCachedContentConfig config = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .displayName("Data Analyst System")
    .systemInstruction(systemInstruction)
    .ttl("7200s") // 2 hours
    .build();

CachedContent cached = client.caches.create(config);

Use Cached Content

// Reference cached content in generation
GenerateContentConfig genConfig = GenerateContentConfig.builder()
    .cachedContent(cached.name().get())
    .build();

GenerateContentResponse response = client.models.generateContent(
    "gemini-2.0-flash",
    "Based on the cached context, answer this question...",
    genConfig
);

// Check token savings
response.usageMetadata().ifPresent(usage -> {
    System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
    System.out.println("Cached tokens: " + usage.cachedContentTokenCount().orElse(0));
});

Update Cached Content

UpdateCachedContentConfig updateConfig = UpdateCachedContentConfig.builder()
    .ttl("7200s") // Extend to 2 hours
    .build();

CachedContent updated = client.caches.update(cachedName, updateConfig);

Get Cached Content

CachedContent cached = client.caches.get(cachedName, null);

System.out.println("Name: " + cached.name().orElse("N/A"));
System.out.println("Display name: " + cached.displayName().orElse("N/A"));
System.out.println("Model: " + cached.model().orElse("N/A"));
System.out.println("Expires: " + cached.expireTime().orElse("N/A"));

cached.usageMetadata().ifPresent(usage -> {
    System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
});

List Cached Content

Pager<CachedContent> pager = client.caches.list(null);

for (CachedContent cached : pager) {
    System.out.println("Cache: " + cached.displayName().orElse("N/A"));
    System.out.println("  Model: " + cached.model().orElse("N/A"));
    System.out.println("  Expires: " + cached.expireTime().orElse("N/A"));
}

Delete Cached Content

DeleteCachedContentResponse response = client.caches.delete(cachedName, null);
System.out.println("Cached content deleted");

Cache Expiration

// Set expiration time
CreateCachedContentConfig config1 = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .contents(contents)
    .expireTime("2024-12-31T23:59:59Z") // Specific time
    .build();

// Set TTL (time to live)
CreateCachedContentConfig config2 = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .contents(contents)
    .ttl("3600s") // 1 hour from creation
    .build();

Best Practices

Cache Large Documents

// Upload large file
File file = client.files.upload("large-document.pdf", null);

// Cache file reference
Content fileContent = Content.fromParts(
    Part.fromUri(file.uri().get(), file.mimeType().get())
);

CreateCachedContentConfig config = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .contents(ImmutableList.of(fileContent))
    .ttl("3600s")
    .build();

CachedContent cached = client.caches.create(config);

// Use cached content multiple times
for (String query : queries) {
    GenerateContentResponse response = client.models.generateContent(
        "gemini-2.0-flash",
        query,
        GenerateContentConfig.builder()
            .cachedContent(cached.name().get())
            .build()
    );
}

Cache System Instructions

Content systemInstruction = Content.fromParts(
    Part.fromText("You are a customer support agent. Be friendly and helpful.")
);

CreateCachedContentConfig config = CreateCachedContentConfig.builder()
    .model("gemini-2.0-flash")
    .systemInstruction(systemInstruction)
    .ttl("86400s") // 24 hours
    .build();

CachedContent cached = client.caches.create(config);

// Use for all customer support conversations
Chat chat = client.chats.create(
    "gemini-2.0-flash",
    GenerateContentConfig.builder()
        .cachedContent(cached.name().get())
        .build()
);

Install with Tessl CLI

npx tessl i tessl/maven-com-google-genai--google-genai

docs

batch-operations.md

caching.md

chat-sessions.md

client-configuration.md

content-generation.md

embeddings-tokens.md

error-handling.md

file-search-stores.md