CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-dev-langchain4j--langchain4j-vertex-ai

LangChain4j integration for Google Vertex AI models including chat, language, embedding, image, and scoring capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

examples.mddocs/models/language/

Language Model Examples

Basic Text Generation

import dev.langchain4j.model.vertexai.VertexAiLanguageModel;
import dev.langchain4j.model.output.Response;

VertexAiLanguageModel model = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@001")
    .build();

Response<String> response = model.generate("Write a short poem about clouds");
String text = response.content();
System.out.println(text);

With Generation Parameters

VertexAiLanguageModel model = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@002")
    .temperature(0.8)              // Controls randomness (0.0-1.0)
    .maxOutputTokens(1000)         // Maximum response length
    .topK(40)                      // Top-K sampling
    .topP(0.95)                    // Nucleus sampling
    .maxRetries(5)                 // Retry attempts on failure
    .build();

Response Metadata

Response<String> response = model.generate("Explain quantum computing in simple terms");

String text = response.content();
TokenUsage tokenUsage = response.tokenUsage();
FinishReason finishReason = response.finishReason();

System.out.println("Generated text: " + text);
System.out.println("Input tokens: " + tokenUsage.inputTokenCount());
System.out.println("Output tokens: " + tokenUsage.outputTokenCount());
System.out.println("Total tokens: " + tokenUsage.totalTokenCount());
System.out.println("Finish reason: " + finishReason);

Batch Generation

List<String> prompts = List.of(
    "What is machine learning?",
    "What is deep learning?",
    "What is a neural network?"
);

for (String prompt : prompts) {
    Response<String> response = model.generate(prompt);
    System.out.println("Prompt: " + prompt);
    System.out.println("Response: " + response.content());
    System.out.println();
}

Code Generation

String codePrompt = """
    Write a Java function that calculates the factorial of a number.
    Include error handling for negative numbers.
    """;

Response<String> response = model.generate(codePrompt);
System.out.println(response.content());

Text Summarization

String longText = """
    [Long article or document text here...]
    """;

String summarizationPrompt = "Summarize the following text in 3 sentences:\n\n" + longText;

Response<String> response = model.generate(summarizationPrompt);
System.out.println("Summary: " + response.content());

Question Answering

String context = """
    The Solar System is the gravitationally bound system of the Sun and the objects
    that orbit it. It formed 4.6 billion years ago from the gravitational collapse of
    a giant interstellar molecular cloud.
    """;

String question = "When did the Solar System form?";
String prompt = "Context: " + context + "\n\nQuestion: " + question + "\n\nAnswer:";

Response<String> response = model.generate(prompt);
System.out.println("Answer: " + response.content());

Temperature Effects

Deterministic (temperature = 0.0)

VertexAiLanguageModel deterministicModel = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@001")
    .temperature(0.0)  // Most deterministic
    .build();

// Will produce same output for same prompt
Response<String> response = deterministicModel.generate("List 3 programming languages");

Creative (temperature = 1.0)

VertexAiLanguageModel creativeModel = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@001")
    .temperature(1.0)  // Most creative
    .build();

// Will produce varied outputs for same prompt
Response<String> response = creativeModel.generate("Write a creative story opener");

Using Direct Constructor (Legacy)

VertexAiLanguageModel model = new VertexAiLanguageModel(
    "https://us-central1-aiplatform.googleapis.com/v1/",  // endpoint
    "your-project-id",                                    // project
    "us-central1",                                        // location
    "google",                                             // publisher
    "text-bison@001",                                     // modelName
    0.7,                                                  // temperature
    500,                                                  // maxOutputTokens
    40,                                                   // topK
    0.95,                                                 // topP
    3                                                     // maxRetries
);

Extended Context Model

VertexAiLanguageModel longContextModel = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison-32k")  // 32k token context
    .build();

String veryLongPrompt = "..."; // Up to 32k tokens
Response<String> response = longContextModel.generate(veryLongPrompt);

Token Limiting

VertexAiLanguageModel model = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@001")
    .maxOutputTokens(50)  // Limit to ~50 tokens
    .build();

Response<String> response = model.generate("Write a long essay about climate change");
// Response will be truncated to ~50 tokens

Error Handling

try {
    Response<String> response = model.generate("Some prompt");
    System.out.println(response.content());
} catch (Exception e) {
    System.err.println("Generation failed: " + e.getMessage());
    // Model automatically retries transient errors up to maxRetries
    // This catches non-retryable errors only
}

Structured Output Generation

String structuredPrompt = """
    Generate a JSON object with the following fields:
    - name: a person's name
    - age: an integer
    - hobbies: an array of strings

    Return only valid JSON, no other text.
    """;

Response<String> response = model.generate(structuredPrompt);
String jsonString = response.content();

// Parse JSON
// ObjectMapper mapper = new ObjectMapper();
// Person person = mapper.readValue(jsonString, Person.class);

Parallel Generation

import java.util.concurrent.CompletableFuture;
import java.util.List;
import java.util.stream.Collectors;

List<String> prompts = List.of(
    "What is AI?",
    "What is ML?",
    "What is DL?"
);

List<CompletableFuture<Response<String>>> futures = prompts.stream()
    .map(prompt -> CompletableFuture.supplyAsync(() -> model.generate(prompt)))
    .toList();

List<Response<String>> responses = futures.stream()
    .map(CompletableFuture::join)
    .toList();

for (int i = 0; i < responses.size(); i++) {
    System.out.println("Prompt: " + prompts.get(i));
    System.out.println("Response: " + responses.get(i).content());
    System.out.println();
}

Cost Optimization

// Use lower maxOutputTokens for cost efficiency
VertexAiLanguageModel economicalModel = VertexAiLanguageModel.builder()
    .endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
    .project("your-project-id")
    .location("us-central1")
    .publisher("google")
    .modelName("text-bison@001")
    .maxOutputTokens(200)     // Limit output
    .temperature(0.0)          // Deterministic = fewer retries needed
    .maxRetries(1)             // Fast failure
    .build();

Install with Tessl CLI

npx tessl i tessl/maven-dev-langchain4j--langchain4j-vertex-ai@1.11.0

docs

index.md

quick-reference.md

tile.json