LangChain4j integration for Google Vertex AI models including chat, language, embedding, image, and scoring capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
import dev.langchain4j.model.vertexai.VertexAiLanguageModel;
import dev.langchain4j.model.output.Response;
VertexAiLanguageModel model = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@001")
.build();
Response<String> response = model.generate("Write a short poem about clouds");
String text = response.content();
System.out.println(text);VertexAiLanguageModel model = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@002")
.temperature(0.8) // Controls randomness (0.0-1.0)
.maxOutputTokens(1000) // Maximum response length
.topK(40) // Top-K sampling
.topP(0.95) // Nucleus sampling
.maxRetries(5) // Retry attempts on failure
.build();Response<String> response = model.generate("Explain quantum computing in simple terms");
String text = response.content();
TokenUsage tokenUsage = response.tokenUsage();
FinishReason finishReason = response.finishReason();
System.out.println("Generated text: " + text);
System.out.println("Input tokens: " + tokenUsage.inputTokenCount());
System.out.println("Output tokens: " + tokenUsage.outputTokenCount());
System.out.println("Total tokens: " + tokenUsage.totalTokenCount());
System.out.println("Finish reason: " + finishReason);List<String> prompts = List.of(
"What is machine learning?",
"What is deep learning?",
"What is a neural network?"
);
for (String prompt : prompts) {
Response<String> response = model.generate(prompt);
System.out.println("Prompt: " + prompt);
System.out.println("Response: " + response.content());
System.out.println();
}String codePrompt = """
Write a Java function that calculates the factorial of a number.
Include error handling for negative numbers.
""";
Response<String> response = model.generate(codePrompt);
System.out.println(response.content());String longText = """
[Long article or document text here...]
""";
String summarizationPrompt = "Summarize the following text in 3 sentences:\n\n" + longText;
Response<String> response = model.generate(summarizationPrompt);
System.out.println("Summary: " + response.content());String context = """
The Solar System is the gravitationally bound system of the Sun and the objects
that orbit it. It formed 4.6 billion years ago from the gravitational collapse of
a giant interstellar molecular cloud.
""";
String question = "When did the Solar System form?";
String prompt = "Context: " + context + "\n\nQuestion: " + question + "\n\nAnswer:";
Response<String> response = model.generate(prompt);
System.out.println("Answer: " + response.content());VertexAiLanguageModel deterministicModel = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@001")
.temperature(0.0) // Most deterministic
.build();
// Will produce same output for same prompt
Response<String> response = deterministicModel.generate("List 3 programming languages");VertexAiLanguageModel creativeModel = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@001")
.temperature(1.0) // Most creative
.build();
// Will produce varied outputs for same prompt
Response<String> response = creativeModel.generate("Write a creative story opener");VertexAiLanguageModel model = new VertexAiLanguageModel(
"https://us-central1-aiplatform.googleapis.com/v1/", // endpoint
"your-project-id", // project
"us-central1", // location
"google", // publisher
"text-bison@001", // modelName
0.7, // temperature
500, // maxOutputTokens
40, // topK
0.95, // topP
3 // maxRetries
);VertexAiLanguageModel longContextModel = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison-32k") // 32k token context
.build();
String veryLongPrompt = "..."; // Up to 32k tokens
Response<String> response = longContextModel.generate(veryLongPrompt);VertexAiLanguageModel model = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@001")
.maxOutputTokens(50) // Limit to ~50 tokens
.build();
Response<String> response = model.generate("Write a long essay about climate change");
// Response will be truncated to ~50 tokenstry {
Response<String> response = model.generate("Some prompt");
System.out.println(response.content());
} catch (Exception e) {
System.err.println("Generation failed: " + e.getMessage());
// Model automatically retries transient errors up to maxRetries
// This catches non-retryable errors only
}String structuredPrompt = """
Generate a JSON object with the following fields:
- name: a person's name
- age: an integer
- hobbies: an array of strings
Return only valid JSON, no other text.
""";
Response<String> response = model.generate(structuredPrompt);
String jsonString = response.content();
// Parse JSON
// ObjectMapper mapper = new ObjectMapper();
// Person person = mapper.readValue(jsonString, Person.class);import java.util.concurrent.CompletableFuture;
import java.util.List;
import java.util.stream.Collectors;
List<String> prompts = List.of(
"What is AI?",
"What is ML?",
"What is DL?"
);
List<CompletableFuture<Response<String>>> futures = prompts.stream()
.map(prompt -> CompletableFuture.supplyAsync(() -> model.generate(prompt)))
.toList();
List<Response<String>> responses = futures.stream()
.map(CompletableFuture::join)
.toList();
for (int i = 0; i < responses.size(); i++) {
System.out.println("Prompt: " + prompts.get(i));
System.out.println("Response: " + responses.get(i).content());
System.out.println();
}// Use lower maxOutputTokens for cost efficiency
VertexAiLanguageModel economicalModel = VertexAiLanguageModel.builder()
.endpoint("https://us-central1-aiplatform.googleapis.com/v1/")
.project("your-project-id")
.location("us-central1")
.publisher("google")
.modelName("text-bison@001")
.maxOutputTokens(200) // Limit output
.temperature(0.0) // Deterministic = fewer retries needed
.maxRetries(1) // Fast failure
.build();Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-vertex-ai@1.11.0