Base starter module for the Embabel Agent Framework providing core dependencies for building agentic flows on the JVM with Spring Boot integration and GOAP-based intelligent path finding.
Step-by-step guide for using multimodal content (text, images, audio, video) in agent actions.
import com.embabel.agent.api.MultimodalContent;
import com.embabel.agent.api.annotation.Action;
@Agent(description = "Text processing agent")
public class TextAgent {
@Action(description = "Process text input")
public Result processText(String input, @Provided Ai ai) {
// Create text content
MultimodalContent textContent = MultimodalContent.text(input);
// Process with LLM
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(textContent));
}
}import com.embabel.agent.api.MultimodalContent
import com.embabel.agent.api.annotation.Action
@Agent(description = "Text processor")
class TextAgent {
@Action(description = "Process text")
fun processText(input: String, @Provided ai: Ai): Result {
val textContent = MultimodalContent.text(input)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(listOf(textContent))
}
}import com.embabel.agent.api.MultimodalContent;
@Agent(description = "Image analysis agent")
public class ImageAnalysisAgent {
@Action(description = "Analyze image")
public Analysis analyzeImage(String imageUrl, @Provided Ai ai) {
// Create image content
MultimodalContent image = MultimodalContent.image(
imageUrl,
"image/jpeg"
);
MultimodalContent prompt = MultimodalContent.text(
"Analyze this image and describe what you see"
);
// Process with multimodal LLM
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(prompt, image));
}
@Action(description = "Compare two images")
public Comparison compareImages(
String imageUrl1,
String imageUrl2,
@Provided Ai ai
) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Compare these two images:"),
MultimodalContent.image(imageUrl1, "image/jpeg"),
MultimodalContent.text("and"),
MultimodalContent.image(imageUrl2, "image/jpeg")
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}import com.embabel.agent.api.MultimodalContent
@Agent(description = "Vision analyzer")
class VisionAgent {
@Action(description = "Analyze image")
fun analyzeImage(imageUrl: String, @Provided ai: Ai): Analysis {
val contents = listOf(
MultimodalContent.text("Analyze this image:"),
MultimodalContent.image(imageUrl, "image/jpeg")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(description = "Extract text from image")
fun extractText(imageUrl: String, @Provided ai: Ai): String {
val contents = listOf(
MultimodalContent.text("Extract all text from this image:"),
MultimodalContent.image(imageUrl, "image/png")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}import com.embabel.agent.api.MultimodalContent;
@Agent(description = "Audio processing agent")
public class AudioAgent {
@Action(description = "Transcribe audio")
public Transcript transcribeAudio(String audioUrl, @Provided Ai ai) {
// Create audio content
MultimodalContent audio = MultimodalContent.audio(
audioUrl,
"audio/mpeg"
);
MultimodalContent prompt = MultimodalContent.text(
"Transcribe this audio"
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(prompt, audio));
}
@Action(description = "Analyze audio sentiment")
public SentimentAnalysis analyzeSentiment(
String audioUrl,
@Provided Ai ai
) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze the sentiment of this audio:"),
MultimodalContent.audio(audioUrl, "audio/wav")
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}import com.embabel.agent.api.MultimodalContent
@Agent(description = "Audio analyzer")
class AudioAnalyzerAgent {
@Action(description = "Transcribe audio")
fun transcribe(audioUrl: String, @Provided ai: Ai): Transcript {
val contents = listOf(
MultimodalContent.text("Transcribe this audio:"),
MultimodalContent.audio(audioUrl, "audio/mpeg")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(description = "Summarize audio content")
fun summarizeAudio(audioUrl: String, @Provided ai: Ai): Summary {
val contents = listOf(
MultimodalContent.text("Summarize the key points from this audio:"),
MultimodalContent.audio(audioUrl, "audio/wav")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}import com.embabel.agent.api.MultimodalContent;
@Agent(description = "Video processing agent")
public class VideoAgent {
@Action(description = "Analyze video")
public VideoAnalysis analyzeVideo(String videoUrl, @Provided Ai ai) {
// Create video content
MultimodalContent video = MultimodalContent.video(
videoUrl,
"video/mp4"
);
MultimodalContent prompt = MultimodalContent.text(
"Analyze this video and describe the key events"
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(prompt, video));
}
@Action(description = "Extract frames from video")
public List<Frame> extractFrames(String videoUrl, @Provided Ai ai) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Extract and describe key frames from this video:"),
MultimodalContent.video(videoUrl, "video/mp4")
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}import com.embabel.agent.api.MultimodalContent
@Agent(description = "Video analyzer")
class VideoAnalyzerAgent {
@Action(description = "Analyze video content")
fun analyzeVideo(videoUrl: String, @Provided ai: Ai): VideoAnalysis {
val contents = listOf(
MultimodalContent.text("Analyze this video:"),
MultimodalContent.video(videoUrl, "video/mp4")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(description = "Generate video summary")
fun summarizeVideo(videoUrl: String, @Provided ai: Ai): Summary {
val contents = listOf(
MultimodalContent.text("Provide a detailed summary of this video:"),
MultimodalContent.video(videoUrl, "video/webm")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}import com.embabel.agent.api.MultimodalContent;
@Agent(description = "File processor")
public class FileProcessorAgent {
@Action(description = "Process PDF document")
public DocumentAnalysis processPdf(String filePath, @Provided Ai ai) {
// Create file content
MultimodalContent pdfFile = MultimodalContent.file(
filePath,
"application/pdf"
);
MultimodalContent prompt = MultimodalContent.text(
"Analyze this PDF document and extract key information"
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(prompt, pdfFile));
}
@Action(description = "Process spreadsheet")
public DataAnalysis processSpreadsheet(String filePath, @Provided Ai ai) {
MultimodalContent spreadsheet = MultimodalContent.file(
filePath,
"application/vnd.ms-excel"
);
MultimodalContent prompt = MultimodalContent.text(
"Analyze the data in this spreadsheet"
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(List.of(prompt, spreadsheet));
}
}import com.embabel.agent.api.MultimodalContent
@Agent(description = "Document processor")
class DocumentProcessorAgent {
@Action(description = "Process PDF")
fun processPdf(filePath: String, @Provided ai: Ai): DocumentAnalysis {
val contents = listOf(
MultimodalContent.text("Extract structured data from this PDF:"),
MultimodalContent.file(filePath, "application/pdf")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(description = "Process Word document")
fun processWord(filePath: String, @Provided ai: Ai): Analysis {
val contents = listOf(
MultimodalContent.text("Summarize this document:"),
MultimodalContent.file(filePath, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}import com.embabel.agent.api.MultimodalContent;
@Agent(description = "Multimodal content processor")
public class MultimodalAgent {
@Action(description = "Process mixed content")
public Report processMultimodal(
String text,
String imageUrl,
String audioUrl,
@Provided Ai ai
) {
// Combine multiple content types
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze the following content:"),
MultimodalContent.text("Text: " + text),
MultimodalContent.image(imageUrl, "image/jpeg"),
MultimodalContent.text("Audio description:"),
MultimodalContent.audio(audioUrl, "audio/mpeg"),
MultimodalContent.text("Provide a comprehensive analysis.")
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
@Action(description = "Create multimedia report")
public MultimediaReport createReport(
List<String> imagePaths,
String narrationUrl,
@Provided Ai ai
) {
List<MultimodalContent> contents = new ArrayList<>();
contents.add(MultimodalContent.text(
"Create a multimedia report from these materials:"
));
// Add all images
for (String imagePath : imagePaths) {
contents.add(MultimodalContent.image(imagePath, "image/png"));
}
// Add narration
contents.add(MultimodalContent.text("With narration:"));
contents.add(MultimodalContent.audio(narrationUrl, "audio/mpeg"));
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}import com.embabel.agent.api.MultimodalContent
@Agent(description = "Rich content processor")
class RichContentAgent {
@Action(description = "Process mixed media")
fun processMixedMedia(
text: String,
images: List<String>,
audioUrl: String,
@Provided ai: Ai
): Analysis {
val contents = buildList {
add(MultimodalContent.text("Analyze this mixed media content:"))
add(MultimodalContent.text("Context: $text"))
images.forEach { imageUrl ->
add(MultimodalContent.image(imageUrl, "image/jpeg"))
}
add(MultimodalContent.text("Audio commentary:"))
add(MultimodalContent.audio(audioUrl, "audio/mpeg"))
}
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(description = "Create presentation from materials")
fun createPresentation(
slides: List<SlideData>,
@Provided ai: Ai
): Presentation {
val contents = mutableListOf<MultimodalContent>()
contents.add(MultimodalContent.text(
"Create a presentation from these slides:"
))
slides.forEach { slide ->
contents.add(MultimodalContent.text("Slide ${slide.number}:"))
contents.add(MultimodalContent.image(slide.imageUrl, "image/png"))
contents.add(MultimodalContent.text("Notes: ${slide.notes}"))
}
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}
data class SlideData(
val number: Int,
val imageUrl: String,
val notes: String
)@Agent(description = "Image processor with various formats")
public class ImageFormatAgent {
@Action(description = "Process JPEG image")
public Result processJpeg(String url, @Provided Ai ai) {
MultimodalContent image = MultimodalContent.image(url, "image/jpeg");
return processImage(image, ai);
}
@Action(description = "Process PNG image")
public Result processPng(String url, @Provided Ai ai) {
MultimodalContent image = MultimodalContent.image(url, "image/png");
return processImage(image, ai);
}
@Action(description = "Process WebP image")
public Result processWebP(String url, @Provided Ai ai) {
MultimodalContent image = MultimodalContent.image(url, "image/webp");
return processImage(image, ai);
}
@Action(description = "Process GIF image")
public Result processGif(String url, @Provided Ai ai) {
MultimodalContent image = MultimodalContent.image(url, "image/gif");
return processImage(image, ai);
}
private Result processImage(MultimodalContent image, Ai ai) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze this image:"),
image
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}@Agent(description = "Format-aware image processor")
class ImageFormatAgent {
@Action(description = "Process image with auto-detect format")
fun processImage(url: String, format: ImageFormat, @Provided ai: Ai): Result {
val mimeType = when (format) {
ImageFormat.JPEG -> "image/jpeg"
ImageFormat.PNG -> "image/png"
ImageFormat.WEBP -> "image/webp"
ImageFormat.GIF -> "image/gif"
ImageFormat.BMP -> "image/bmp"
ImageFormat.TIFF -> "image/tiff"
}
val contents = listOf(
MultimodalContent.text("Analyze this $format image:"),
MultimodalContent.image(url, mimeType)
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}
enum class ImageFormat {
JPEG, PNG, WEBP, GIF, BMP, TIFF
}@Agent(description = "Audio processor with various formats")
public class AudioFormatAgent {
@Action(description = "Process MP3 audio")
public Transcript processMp3(String url, @Provided Ai ai) {
MultimodalContent audio = MultimodalContent.audio(url, "audio/mpeg");
return transcribeAudio(audio, ai);
}
@Action(description = "Process WAV audio")
public Transcript processWav(String url, @Provided Ai ai) {
MultimodalContent audio = MultimodalContent.audio(url, "audio/wav");
return transcribeAudio(audio, ai);
}
@Action(description = "Process OGG audio")
public Transcript processOgg(String url, @Provided Ai ai) {
MultimodalContent audio = MultimodalContent.audio(url, "audio/ogg");
return transcribeAudio(audio, ai);
}
@Action(description = "Process FLAC audio")
public Transcript processFlac(String url, @Provided Ai ai) {
MultimodalContent audio = MultimodalContent.audio(url, "audio/flac");
return transcribeAudio(audio, ai);
}
private Transcript transcribeAudio(MultimodalContent audio, Ai ai) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Transcribe this audio:"),
audio
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}@Agent(description = "Format-aware audio processor")
class AudioFormatAgent {
@Action(description = "Process audio with format")
fun processAudio(url: String, format: AudioFormat, @Provided ai: Ai): Transcript {
val mimeType = when (format) {
AudioFormat.MP3 -> "audio/mpeg"
AudioFormat.WAV -> "audio/wav"
AudioFormat.OGG -> "audio/ogg"
AudioFormat.FLAC -> "audio/flac"
AudioFormat.AAC -> "audio/aac"
AudioFormat.M4A -> "audio/mp4"
}
val contents = listOf(
MultimodalContent.text("Transcribe this $format audio:"),
MultimodalContent.audio(url, mimeType)
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}
enum class AudioFormat {
MP3, WAV, OGG, FLAC, AAC, M4A
}@Agent(description = "Video processor with various formats")
public class VideoFormatAgent {
@Action(description = "Process MP4 video")
public VideoAnalysis processMp4(String url, @Provided Ai ai) {
MultimodalContent video = MultimodalContent.video(url, "video/mp4");
return analyzeVideo(video, ai);
}
@Action(description = "Process WebM video")
public VideoAnalysis processWebM(String url, @Provided Ai ai) {
MultimodalContent video = MultimodalContent.video(url, "video/webm");
return analyzeVideo(video, ai);
}
@Action(description = "Process MOV video")
public VideoAnalysis processMov(String url, @Provided Ai ai) {
MultimodalContent video = MultimodalContent.video(url, "video/quicktime");
return analyzeVideo(video, ai);
}
@Action(description = "Process AVI video")
public VideoAnalysis processAvi(String url, @Provided Ai ai) {
MultimodalContent video = MultimodalContent.video(url, "video/x-msvideo");
return analyzeVideo(video, ai);
}
private VideoAnalysis analyzeVideo(MultimodalContent video, Ai ai) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze this video:"),
video
);
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
}
}@Agent(description = "Format-aware video processor")
class VideoFormatAgent {
@Action(description = "Process video with format")
fun processVideo(url: String, format: VideoFormat, @Provided ai: Ai): VideoAnalysis {
val mimeType = when (format) {
VideoFormat.MP4 -> "video/mp4"
VideoFormat.WEBM -> "video/webm"
VideoFormat.MOV -> "video/quicktime"
VideoFormat.AVI -> "video/x-msvideo"
VideoFormat.MKV -> "video/x-matroska"
VideoFormat.FLV -> "video/x-flv"
}
val contents = listOf(
MultimodalContent.text("Analyze this $format video:"),
MultimodalContent.video(url, mimeType)
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
}
enum class VideoFormat {
MP4, WEBM, MOV, AVI, MKV, FLV
}import com.embabel.agent.api.*;
import com.embabel.agent.api.annotation.*;
import java.util.*;
@Agent(
description = "Comprehensive multimedia content analyzer",
planner = PlannerType.GOAP
)
public class MultimediaAnalyzerAgent {
private final StorageService storage;
private final TranscriptionService transcription;
public MultimediaAnalyzerAgent(
StorageService storage,
TranscriptionService transcription
) {
this.storage = storage;
this.transcription = transcription;
}
@Action(
description = "Upload and prepare media files",
post = {"mediaUploaded"},
outputBinding = "mediaUrls"
)
public MediaUrls uploadMedia(
List<File> files,
@Provided ActionContext context
) {
context.updateProgress("Uploading " + files.size() + " files");
List<String> imageUrls = new ArrayList<>();
List<String> audioUrls = new ArrayList<>();
List<String> videoUrls = new ArrayList<>();
for (File file : files) {
String url = storage.upload(file);
String mimeType = Files.probeContentType(file.toPath());
if (mimeType.startsWith("image/")) {
imageUrls.add(url);
} else if (mimeType.startsWith("audio/")) {
audioUrls.add(url);
} else if (mimeType.startsWith("video/")) {
videoUrls.add(url);
}
}
return new MediaUrls(imageUrls, audioUrls, videoUrls);
}
@Action(
description = "Analyze images",
pre = {"mediaUploaded"},
post = {"imagesAnalyzed"},
outputBinding = "imageAnalyses"
)
public List<ImageAnalysis> analyzeImages(
MediaUrls mediaUrls,
@Provided Ai ai,
@Provided ActionContext context
) {
context.updateProgress("Analyzing " + mediaUrls.getImageUrls().size() + " images");
List<ImageAnalysis> analyses = new ArrayList<>();
for (String imageUrl : mediaUrls.getImageUrls()) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze this image in detail:"),
MultimodalContent.image(imageUrl, "image/jpeg")
);
ImageAnalysis analysis = ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
analyses.add(analysis);
}
return analyses;
}
@Action(
description = "Transcribe audio files",
pre = {"mediaUploaded"},
post = {"audioTranscribed"},
outputBinding = "transcripts"
)
public List<Transcript> transcribeAudio(
MediaUrls mediaUrls,
@Provided Ai ai,
@Provided ActionContext context
) {
context.updateProgress("Transcribing " + mediaUrls.getAudioUrls().size() + " audio files");
List<Transcript> transcripts = new ArrayList<>();
for (String audioUrl : mediaUrls.getAudioUrls()) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Transcribe this audio:"),
MultimodalContent.audio(audioUrl, "audio/mpeg")
);
Transcript transcript = ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
transcripts.add(transcript);
}
return transcripts;
}
@Action(
description = "Analyze video content",
pre = {"mediaUploaded"},
post = {"videosAnalyzed"},
outputBinding = "videoAnalyses"
)
public List<VideoAnalysis> analyzeVideos(
MediaUrls mediaUrls,
@Provided Ai ai,
@Provided ActionContext context
) {
context.updateProgress("Analyzing " + mediaUrls.getVideoUrls().size() + " videos");
List<VideoAnalysis> analyses = new ArrayList<>();
for (String videoUrl : mediaUrls.getVideoUrls()) {
List<MultimodalContent> contents = List.of(
MultimodalContent.text("Analyze this video:"),
MultimodalContent.video(videoUrl, "video/mp4")
);
VideoAnalysis analysis = ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
analyses.add(analysis);
}
return analyses;
}
@AchievesGoal(
description = "Generate comprehensive multimedia report",
tags = {"multimedia", "analysis", "reporting"},
export = @Export(remote = true, local = true),
value = 100.0
)
@Action(
description = "Generate final report",
pre = {"imagesAnalyzed", "audioTranscribed", "videosAnalyzed"}
)
public MultimediaReport generateReport(
MediaUrls mediaUrls,
List<ImageAnalysis> imageAnalyses,
List<Transcript> transcripts,
List<VideoAnalysis> videoAnalyses,
@Provided Ai ai,
@Provided ActionContext context
) {
context.updateProgress("Generating comprehensive report");
// Build multimodal content for final synthesis
List<MultimodalContent> contents = new ArrayList<>();
contents.add(MultimodalContent.text(
"Create a comprehensive report from the following analyzed content:"
));
// Add image analyses
contents.add(MultimodalContent.text("Images analyzed:"));
for (int i = 0; i < imageAnalyses.size(); i++) {
contents.add(MultimodalContent.text(
"Image " + (i + 1) + ": " + imageAnalyses.get(i).getSummary()
));
contents.add(MultimodalContent.image(
mediaUrls.getImageUrls().get(i),
"image/jpeg"
));
}
// Add transcripts
contents.add(MultimodalContent.text("Audio transcripts:"));
for (int i = 0; i < transcripts.size(); i++) {
contents.add(MultimodalContent.text(
"Audio " + (i + 1) + ": " + transcripts.get(i).getText()
));
}
// Add video analyses
contents.add(MultimodalContent.text("Video analyses:"));
for (int i = 0; i < videoAnalyses.size(); i++) {
contents.add(MultimodalContent.text(
"Video " + (i + 1) + ": " + videoAnalyses.get(i).getSummary()
));
}
// Generate final report
MultimediaReport report = ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents);
context.sendMessage(Message.info("Report generated successfully"));
return report;
}
}
// Data classes
class MediaUrls {
private final List<String> imageUrls;
private final List<String> audioUrls;
private final List<String> videoUrls;
public MediaUrls(
List<String> imageUrls,
List<String> audioUrls,
List<String> videoUrls
) {
this.imageUrls = imageUrls;
this.audioUrls = audioUrls;
this.videoUrls = videoUrls;
}
public List<String> getImageUrls() { return imageUrls; }
public List<String> getAudioUrls() { return audioUrls; }
public List<String> getVideoUrls() { return videoUrls; }
}import com.embabel.agent.api.*
import com.embabel.agent.api.annotation.*
@Agent(
description = "Social media content moderator with multimodal analysis",
planner = PlannerType.GOAP
)
class ContentModeratorAgent(
private val contentStore: ContentStore,
private val moderationRules: ModerationRules
) {
@Action(
description = "Fetch content for moderation",
post = ["contentFetched"],
outputBinding = "content"
)
fun fetchContent(contentId: String): SocialContent {
return contentStore.fetch(contentId)
}
@Action(
description = "Analyze text content",
pre = ["contentFetched"],
post = ["textAnalyzed"],
outputBinding = "textAnalysis"
)
fun analyzeText(
content: SocialContent,
@Provided ai: Ai
): TextModerationResult {
val contents = listOf(
MultimodalContent.text(
"Moderate this social media post for: " +
"hate speech, violence, explicit content, misinformation"
),
MultimodalContent.text("Post text: ${content.text}")
)
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(
description = "Analyze image content",
pre = ["contentFetched"],
post = ["imagesAnalyzed"],
outputBinding = "imageAnalysis"
)
fun analyzeImages(
content: SocialContent,
@Provided ai: Ai,
@Provided context: ActionContext
): ImageModerationResult {
if (content.imageUrls.isEmpty()) {
return ImageModerationResult.empty()
}
context.updateProgress("Analyzing ${content.imageUrls.size} images")
val contents = buildList {
add(MultimodalContent.text(
"Moderate these images for: " +
"explicit content, violence, illegal activities, graphic content"
))
content.imageUrls.forEach { url ->
add(MultimodalContent.image(url, "image/jpeg"))
}
}
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@Action(
description = "Analyze video content",
pre = ["contentFetched"],
post = ["videosAnalyzed"],
outputBinding = "videoAnalysis"
)
fun analyzeVideos(
content: SocialContent,
@Provided ai: Ai,
@Provided context: ActionContext
): VideoModerationResult {
if (content.videoUrls.isEmpty()) {
return VideoModerationResult.empty()
}
context.updateProgress("Analyzing ${content.videoUrls.size} videos")
val contents = buildList {
add(MultimodalContent.text(
"Moderate these videos for policy violations"
))
content.videoUrls.forEach { url ->
add(MultimodalContent.video(url, "video/mp4"))
}
}
return ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject(contents)
}
@AchievesGoal(
description = "Complete content moderation decision",
tags = ["moderation", "safety", "compliance"],
export = Export(remote = true, local = true),
value = 100.0
)
@Action(
description = "Generate moderation decision",
pre = ["textAnalyzed", "imagesAnalyzed", "videosAnalyzed"]
)
fun generateDecision(
content: SocialContent,
textAnalysis: TextModerationResult,
imageAnalysis: ImageModerationResult,
videoAnalysis: VideoModerationResult,
@Provided ai: Ai,
@Provided context: ActionContext
): ModerationDecision {
context.updateProgress("Generating final moderation decision")
// Combine all analyses
val contents = listOf(
MultimodalContent.text(
"Based on the following moderation analyses, " +
"generate a final decision (APPROVE, FLAG, REMOVE):"
),
MultimodalContent.text("Text analysis: ${textAnalysis.summary}"),
MultimodalContent.text("Image analysis: ${imageAnalysis.summary}"),
MultimodalContent.text("Video analysis: ${videoAnalysis.summary}"),
MultimodalContent.text("Content ID: ${content.id}")
)
val decision = ai.withLlm(GeminiModels.GEMINI_2_5_PRO)
.createObject<ModerationDecision>(contents)
context.sendMessage(Message.info(
"Moderation decision: ${decision.action} - ${decision.reason}"
))
return decision
}
}
// Data classes
data class SocialContent(
val id: String,
val text: String,
val imageUrls: List<String> = emptyList(),
val videoUrls: List<String> = emptyList(),
val audioUrls: List<String> = emptyList()
)
data class TextModerationResult(
val summary: String,
val violations: List<String>,
val severity: Severity
)
data class ImageModerationResult(
val summary: String,
val violations: List<String>,
val severity: Severity
) {
companion object {
fun empty() = ImageModerationResult("No images", emptyList(), Severity.NONE)
}
}
data class VideoModerationResult(
val summary: String,
val violations: List<String>,
val severity: Severity
) {
companion object {
fun empty() = VideoModerationResult("No videos", emptyList(), Severity.NONE)
}
}
data class ModerationDecision(
val action: ModerationAction,
val reason: String,
val confidence: Double
)
enum class ModerationAction {
APPROVE, FLAG, REMOVE
}
enum class Severity {
NONE, LOW, MEDIUM, HIGH, CRITICAL
}MultimodalContent.text(content) - Create text contentMultimodalContent.image(url, mimeType) - Create image contentMultimodalContent.audio(url, mimeType) - Create audio contentMultimodalContent.video(url, mimeType) - Create video contentMultimodalContent.file(path, mimeType) - Create file contentImages: image/jpeg, image/png, image/gif, image/webp, image/bmp
Audio: audio/mpeg (MP3), audio/wav, audio/ogg, audio/flac, audio/aac
Video: video/mp4, video/webm, video/quicktime (MOV), video/x-msvideo (AVI)
Documents: application/pdf, application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.wordprocessingml.document
tessl i tessl/maven-com-embabel-agent--embabel-agent-starter@0.3.1docs