Multi-module test support framework for Embabel Agent applications providing integration testing, mock AI services, and test configuration utilities
Step-by-step guide for stubbing LLM operations in your tests.
Stubbing allows you to control what the mocked LLM returns in your tests. Instead of making real API calls, you define the response the LLM should return for specific inputs.
EmbabelMockitoIntegrationTestDetermine what type of LLM operation your code performs:
| Operation | Use When | Stub Method |
|---|---|---|
| Text generation | LLM returns plain text | whenGenerateText() |
| Object creation | LLM returns structured object | whenCreateObject() |
// For text generation
whenGenerateText(prompt -> prompt.contains("keyword"))
.thenReturn("Mocked response");
// For object creation
whenCreateObject(prompt -> prompt.contains("keyword"), OutputClass.class)
.thenReturn(mockedObject);String result = myAgent.process("user input");The stub intercepts the LLM call and returns your mocked response.
@Test
void testSimpleText() {
// Stub: When prompt contains "summarize", return this text
whenGenerateText(prompt -> prompt.contains("summarize"))
.thenReturn("This is a summary of the document.");
// Execute code that uses LLM
String summary = myAgent.summarize(document);
// Assert result
assertEquals("This is a summary of the document.", summary);
}@Test
void testMultipleStubs() {
// Stub first operation
whenGenerateText(p -> p.contains("greet"))
.thenReturn("Hello!");
// Stub second operation
whenGenerateText(p -> p.contains("farewell"))
.thenReturn("Goodbye!");
// Execute both operations
String greeting = myAgent.generateGreeting();
String farewell = myAgent.generateFarewell();
// Assert both
assertEquals("Hello!", greeting);
assertEquals("Goodbye!", farewell);
}@Test
void testComplexPredicate() {
// Stub with multiple conditions
whenGenerateText(prompt ->
prompt.contains("analyze") &&
prompt.length() > 50 &&
prompt.toLowerCase().contains("data")
).thenReturn("Complex analysis result");
String result = myAgent.analyzeData(largeDataset);
assertEquals("Complex analysis result", result);
}@Test
void testObjectCreation() {
// Create expected object
Person expectedPerson = new Person("Alice", 30);
// Stub object creation
whenCreateObject(
prompt -> prompt.contains("extract person"),
Person.class
).thenReturn(expectedPerson);
// Execute code that extracts person
Person result = myAgent.extractPerson("Alice is 30 years old");
// Assert result
assertEquals(expectedPerson, result);
}@Test
void testMultipleObjects() {
Person person = new Person("Bob", 25);
Address address = new Address("123 Main St");
// Stub person extraction
whenCreateObject(p -> p.contains("person"), Person.class)
.thenReturn(person);
// Stub address extraction
whenCreateObject(p -> p.contains("address"), Address.class)
.thenReturn(address);
// Execute
Person p = myAgent.extractPerson(text);
Address a = myAgent.extractAddress(text);
// Assert
assertEquals(person, p);
assertEquals(address, a);
}Match on both prompt and model configuration:
@Test
void testWithInteraction() {
// Stub only when using gpt-4 with temperature 0.0
whenGenerateText(
prompt -> prompt.contains("precise"),
interaction ->
interaction.getModel().equals("gpt-4") &&
interaction.getTemperature() == 0.0
).thenReturn("Precise result");
String result = myAgent.preciseModeAnalysis();
assertEquals("Precise result", result);
}Return different responses based on prompt content:
@Test
void testConditionalResponses() {
// Stub for positive sentiment
whenGenerateText(p -> p.contains("happy"))
.thenReturn("Positive sentiment detected");
// Stub for negative sentiment
whenGenerateText(p -> p.contains("sad"))
.thenReturn("Negative sentiment detected");
String pos = myAgent.analyzeSentiment("I am happy");
String neg = myAgent.analyzeSentiment("I am sad");
assertEquals("Positive sentiment detected", pos);
assertEquals("Negative sentiment detected", neg);
}Stub to throw exceptions for error testing:
@Test
void testErrorHandling() {
// Stub to throw exception
whenGenerateText(p -> p.contains("error"))
.thenThrow(new RuntimeException("LLM service unavailable"));
// Assert exception is handled
assertThrows(RuntimeException.class, () -> {
myAgent.processWithError();
});
}Use .thenAnswer() for dynamic responses:
@Test
void testDynamicResponse() {
whenGenerateText(p -> true).thenAnswer(invocation -> {
String prompt = invocation.getArgument(0);
return "You asked about: " + prompt.substring(0, 20);
});
String result = myAgent.process("long prompt text here");
assertTrue(result.startsWith("You asked about:"));
}@Test
fun `test simple stub`() {
// Kotlin lambda syntax
whenGenerateText { it.contains("summarize") }
.thenReturn("Summary result")
val result = myAgent.summarize(document)
assertEquals("Summary result", result)
}@Test
fun `test object creation`() {
val person = Person("Alice", 30)
whenCreateObject({ it.contains("extract") }, Person::class.java)
.thenReturn(person)
val result = myAgent.extractPerson(text)
assertEquals(person, result)
}// Catch-all stub for any prompt
whenGenerateText(p -> true)
.thenReturn("Default response");// Return different responses for multiple calls
whenGenerateText(p -> p.contains("step"))
.thenReturn("Step 1")
.thenReturn("Step 2")
.thenReturn("Step 3");// Match part of the prompt
whenGenerateText(p -> p.toLowerCase().contains("search"))
.thenReturn("Search results");// Use regex for complex matching
whenGenerateText(p -> p.matches(".*\\d{4}-\\d{2}-\\d{2}.*"))
.thenReturn("Date-based response");Problem: Your code doesn't receive the stubbed response.
Solutions:
Debug technique:
whenGenerateText(p -> {
System.out.println("Actual prompt: " + p);
return p.contains("expected");
}).thenReturn("response");Problem: Multiple stubs match the same prompt.
Solution: Mockito uses the most recently defined stub that matches. Be specific with predicates:
// Bad: Both match
whenGenerateText(p -> p.contains("text")).thenReturn("A");
whenGenerateText(p -> true).thenReturn("B");
// Good: Specific conditions
whenGenerateText(p -> p.contains("specific text")).thenReturn("A");
whenGenerateText(p -> p.contains("other text")).thenReturn("B");Problem: Stub returns wrong type of object.
Solution: Ensure the class in whenCreateObject matches what your code expects:
// Correct
whenCreateObject(p -> true, Person.class).thenReturn(person);
// Wrong - will fail if code expects Person
whenCreateObject(p -> true, Employee.class).thenReturn(employee);// Reusable predicates
private static final Predicate<String> SUMMARIZE_PROMPT =
p -> p.contains("summarize");
@Test
void testWithReusablePredicate() {
whenGenerateText(SUMMARIZE_PROMPT).thenReturn("Summary");
// ...
}