tessl/maven-org-springframework-ai--spring-ai-model

Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers

Overview

Eval results

Files

Content Moderation

Name: tessl/maven-org-springframework-ai--spring-ai-model
Author: tessl

Content safety and moderation capabilities for detecting harmful content including hate speech, violence, sexual content, self-harm, and other policy violations with category-specific scores and flags.

Capabilities

ModerationModel Interface

Main interface for content moderation.

public interface ModerationModel extends Model<ModerationPrompt, ModerationResponse> {
    /**
     * Check content for policy violations.
     *
     * @param request the moderation prompt
     * @return the moderation response with flags and scores
     */
    ModerationResponse call(ModerationPrompt request);
}

ModerationPrompt

Request for content moderation.

public class ModerationPrompt implements ModelRequest<ModerationMessage> {
    /**
     * Construct a ModerationPrompt from text.
     *
     * @param text the text to moderate
     */
    public ModerationPrompt(String text);

    /**
     * Construct a ModerationPrompt from a message.
     *
     * @param message the moderation message
     */
    public ModerationPrompt(ModerationMessage message);

    /**
     * Construct a ModerationPrompt with message and options.
     *
     * @param message the moderation message
     * @param options the moderation options
     */
    public ModerationPrompt(ModerationMessage message, ModerationOptions options);

    /**
     * Get the moderation message.
     *
     * @return the message
     */
    ModerationMessage getInstructions();

    /**
     * Get the moderation options.
     *
     * @return the options
     */
    ModerationOptions getOptions();
}

ModerationResponse

Response containing moderation results.

public class ModerationResponse implements ModelResponse<Generation> {
    /**
     * Construct a ModerationResponse with a generation.
     *
     * @param generation the moderation generation
     */
    public ModerationResponse(Generation generation);

    /**
     * Construct a ModerationResponse with multiple generations.
     *
     * @param generations the list of moderation generations
     */
    public ModerationResponse(List<Generation> generations);

    /**
     * Construct a ModerationResponse with generations and metadata.
     *
     * @param generations the list of moderation generations
     * @param metadata the response metadata
     */
    public ModerationResponse(
        List<Generation> generations,
        ModerationResponseMetadata metadata
    );

    /**
     * Get the first moderation result.
     *
     * @return the first generation
     */
    Generation getResult();

    /**
     * Get all moderation results.
     *
     * @return list of generations
     */
    List<Generation> getResults();

    /**
     * Get response metadata.
     *
     * @return the moderation response metadata
     */
    ModerationResponseMetadata getMetadata();
}

Generation

Single moderation result.

public class Generation implements ModelResult<Moderation> {
    /**
     * Construct a Generation with moderation.
     *
     * @param moderation the moderation result
     */
    public Generation(Moderation moderation);

    /**
     * Construct a Generation with moderation and metadata.
     *
     * @param moderation the moderation result
     * @param metadata the generation metadata
     */
    public Generation(Moderation moderation, ModerationGenerationMetadata metadata);

    /**
     * Get the moderation result.
     *
     * @return the moderation
     */
    Moderation getOutput();

    /**
     * Get generation metadata.
     *
     * @return the moderation generation metadata
     */
    ModerationGenerationMetadata getMetadata();
}

Moderation

Detailed moderation result with flags and scores.

public final class Moderation {
    /**
     * Get the result ID.
     *
     * @return the ID
     */
    String getId();

    /**
     * Get the model name used for moderation.
     *
     * @return the model name
     */
    String getModel();

    /**
     * Get the list of moderation results.
     *
     * @return list of moderation results
     */
    List<ModerationResult> getResults();

    /**
     * Create a new moderation builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing Moderation instances.
     */
    public static final class Builder {
        /**
         * Set the moderation ID.
         *
         * @param id the ID
         * @return this builder
         */
        Builder id(String id);

        /**
         * Set the model name.
         *
         * @param model the model name
         * @return this builder
         */
        Builder model(String model);

        /**
         * Set the moderation results.
         *
         * @param results the list of moderation results
         * @return this builder
         */
        Builder results(List<ModerationResult> results);

        /**
         * Build the Moderation instance.
         *
         * @return the moderation
         */
        Moderation build();
    }
}

ModerationResult

Individual moderation result indicating whether content was flagged, with detailed categories and scores.

public final class ModerationResult {
    /**
     * Check if content was flagged as violating moderation policies.
     *
     * @return true if content was flagged
     */
    boolean isFlagged();

    /**
     * Set whether content was flagged.
     *
     * @param flagged true if flagged
     */
    void setFlagged(boolean flagged);

    /**
     * Get the categories of moderation violations.
     *
     * @return the categories
     */
    Categories getCategories();

    /**
     * Set the categories of moderation violations.
     *
     * @param categories the categories
     */
    void setCategories(Categories categories);

    /**
     * Get the detailed scores for each category.
     *
     * @return the category scores
     */
    CategoryScores getCategoryScores();

    /**
     * Set the detailed scores for each category.
     *
     * @param categoryScores the category scores
     */
    void setCategoryScores(CategoryScores categoryScores);

    /**
     * Create a new moderation result builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing ModerationResult instances.
     */
    public static final class Builder {
        /**
         * Set whether content was flagged.
         *
         * @param flagged true if flagged
         * @return this builder
         */
        Builder flagged(boolean flagged);

        /**
         * Set the categories.
         *
         * @param categories the categories
         * @return this builder
         */
        Builder categories(Categories categories);

        /**
         * Set the category scores.
         *
         * @param categoryScores the category scores
         * @return this builder
         */
        Builder categoryScores(CategoryScores categoryScores);

        /**
         * Build the ModerationResult instance.
         *
         * @return the moderation result
         */
        ModerationResult build();
    }
}

CategoryScores

Confidence scores for each moderation category (0.0 to 1.0).

public final class CategoryScores {
    /**
     * Get sexual content score.
     *
     * @return score from 0.0 (low confidence) to 1.0 (high confidence)
     */
    double getSexual();

    /**
     * Get hate speech score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHate();

    /**
     * Get harassment score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHarassment();

    /**
     * Get self-harm score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarm();

    /**
     * Get sexual/minors content score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSexualMinors();

    /**
     * Get hate/threatening score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHateThreatening();

    /**
     * Get graphic violence score.
     *
     * @return score from 0.0 to 1.0
     */
    double getViolenceGraphic();

    /**
     * Get self-harm intent score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarmIntent();

    /**
     * Get self-harm instructions score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarmInstructions();

    /**
     * Get harassment/threatening score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHarassmentThreatening();

    /**
     * Get violence score.
     *
     * @return score from 0.0 to 1.0
     */
    double getViolence();

    /**
     * Get dangerous and criminal content score.
     *
     * @return score from 0.0 to 1.0
     */
    double getDangerousAndCriminalContent();

    /**
     * Get health score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHealth();

    /**
     * Get financial score.
     *
     * @return score from 0.0 to 1.0
     */
    double getFinancial();

    /**
     * Get law score.
     *
     * @return score from 0.0 to 1.0
     */
    double getLaw();

    /**
     * Get PII score.
     *
     * @return score from 0.0 to 1.0
     */
    double getPii();

    /**
     * Create a new category scores builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing CategoryScores instances.
     */
    public static final class Builder {
        Builder sexual(double sexual);
        Builder hate(double hate);
        Builder harassment(double harassment);
        Builder selfHarm(double selfHarm);
        Builder sexualMinors(double sexualMinors);
        Builder hateThreatening(double hateThreatening);
        Builder violenceGraphic(double violenceGraphic);
        Builder selfHarmIntent(double selfHarmIntent);
        Builder selfHarmInstructions(double selfHarmInstructions);
        Builder harassmentThreatening(double harassmentThreatening);
        Builder violence(double violence);
        Builder dangerousAndCriminalContent(double dangerousAndCriminalContent);
        Builder health(double health);
        Builder financial(double financial);
        Builder law(double law);
        Builder pii(double pii);
        CategoryScores build();
    }
}

ModerationMessage

Message containing text to moderate.

public class ModerationMessage {
    /**
     * Construct a ModerationMessage with text.
     *
     * @param text the text to moderate
     */
    public ModerationMessage(String text);

    /**
     * Get the text content.
     *
     * @return the text
     */
    String getText();
}

ModerationOptions Interface

Options for configuring moderation.

public interface ModerationOptions extends ModelOptions {
    /**
     * Get the model name to use for moderation.
     *
     * @return the model name
     */
    String getModel();
}

ModerationGenerationMetadata Interface

Metadata for moderation generation.

public interface ModerationGenerationMetadata extends ResultMetadata {
    // Generation-specific metadata
}

ModerationResponseMetadata

Metadata for moderation responses.

public class ModerationResponseMetadata extends AbstractResponseMetadata {
    // Response-level metadata
}

Usage Examples

Simple Content Moderation

import org.springframework.ai.moderation.ModerationModel;
import org.springframework.ai.moderation.ModerationPrompt;
import org.springframework.ai.moderation.ModerationResponse;
import org.springframework.ai.moderation.Moderation;
import org.springframework.beans.factory.annotation.Autowired;

@Service
public class ModerationService {
    @Autowired
    private ModerationModel moderationModel;

    public boolean isContentSafe(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation result = response.getResult().getOutput();

        // Returns false if content is flagged
        return !result.isFlagged();
    }
}

Detailed Moderation Check

ModerationPrompt prompt = new ModerationPrompt("Content to check");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();

// Check if flagged
if (moderation.isFlagged()) {
    System.out.println("Content flagged!");

    // Check specific categories
    Categories categories = moderation.getCategories();

    if (categories.isHate()) {
        System.out.println("Contains hate speech");
    }
    if (categories.isViolence()) {
        System.out.println("Contains violence");
    }
    if (categories.isSexual()) {
        System.out.println("Contains sexual content");
    }
    if (categories.isSelfHarm()) {
        System.out.println("Contains self-harm content");
    }
    if (categories.isHarassment()) {
        System.out.println("Contains harassment");
    }
}

Checking Category Scores

ModerationPrompt prompt = new ModerationPrompt("Content to analyze");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();

// Get confidence scores
CategoryScores scores = moderation.getCategoryScores();

System.out.println("Moderation Scores:");
System.out.println("Hate: " + scores.getHate());
System.out.println("Violence: " + scores.getViolence());
System.out.println("Sexual: " + scores.getSexual());
System.out.println("Self-harm: " + scores.getSelfHarm());
System.out.println("Harassment: " + scores.getHarassment());

// Use custom threshold
double threshold = 0.5;
if (scores.getHate() > threshold) {
    System.out.println("High confidence hate speech detected");
}

Custom Thresholds

@Service
public class CustomModerationService {
    private final ModerationModel moderationModel;
    private static final double HIGH_RISK_THRESHOLD = 0.8;
    private static final double MEDIUM_RISK_THRESHOLD = 0.5;

    public ModerationResult moderateContent(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        CategoryScores scores = moderation.getCategoryScores();

        // Determine risk level
        double maxScore = getMaxScore(scores);

        if (maxScore >= HIGH_RISK_THRESHOLD) {
            return new ModerationResult("BLOCKED", "High risk content");
        } else if (maxScore >= MEDIUM_RISK_THRESHOLD) {
            return new ModerationResult("REVIEW", "Requires review");
        } else {
            return new ModerationResult("ALLOWED", "Content is safe");
        }
    }

    private double getMaxScore(CategoryScores scores) {
        return Math.max(
            Math.max(scores.getHate(), scores.getViolence()),
            Math.max(scores.getSexual(),
                Math.max(scores.getSelfHarm(), scores.getHarassment())
            )
        );
    }

    record ModerationResult(String action, String reason) {}
}

Batch Moderation

public List<ModerationResult> moderateMultipleTexts(List<String> texts) {
    List<ModerationResult> results = new ArrayList<>();

    for (String text : texts) {
        ModerationPrompt prompt = new ModerationPrompt(text);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        results.add(new ModerationResult(
            text,
            moderation.isFlagged(),
            moderation.getCategories()
        ));
    }

    return results;
}

record ModerationResult(String text, boolean flagged, Categories categories) {}

Chat Message Moderation

@Service
public class ChatModerationService {
    private final ModerationModel moderationModel;

    public boolean canSendMessage(String message) {
        ModerationPrompt prompt = new ModerationPrompt(message);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        // Block flagged content
        if (moderation.isFlagged()) {
            Categories categories = moderation.getCategories();

            // Log which categories were flagged
            logViolations(categories);

            return false;
        }

        return true;
    }

    private void logViolations(Categories categories) {
        List<String> violations = new ArrayList<>();

        if (categories.isHate()) violations.add("hate");
        if (categories.isViolence()) violations.add("violence");
        if (categories.isSexual()) violations.add("sexual");
        if (categories.isSelfHarm()) violations.add("self-harm");
        if (categories.isHarassment()) violations.add("harassment");

        System.out.println("Content blocked for: " + String.join(", ", violations));
    }
}

REST API Example

@RestController
@RequestMapping("/api/moderation")
public class ModerationController {
    private final ModerationModel moderationModel;

    public ModerationController(ModerationModel moderationModel) {
        this.moderationModel = moderationModel;
    }

    @PostMapping("/check")
    public ModerationCheckResult checkContent(@RequestBody ModerationCheckRequest request) {
        ModerationPrompt prompt = new ModerationPrompt(request.text());
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        return new ModerationCheckResult(
            moderation.isFlagged(),
            convertCategories(moderation.getCategories()),
            convertScores(moderation.getCategoryScores())
        );
    }

    private Map<String, Boolean> convertCategories(Categories categories) {
        return Map.of(
            "hate", categories.isHate(),
            "violence", categories.isViolence(),
            "sexual", categories.isSexual(),
            "selfHarm", categories.isSelfHarm(),
            "harassment", categories.isHarassment()
        );
    }

    private Map<String, Double> convertScores(CategoryScores scores) {
        return Map.of(
            "hate", scores.getHate(),
            "violence", scores.getViolence(),
            "sexual", scores.getSexual(),
            "selfHarm", scores.getSelfHarm(),
            "harassment", scores.getHarassment()
        );
    }

    record ModerationCheckRequest(String text) {}

    record ModerationCheckResult(
        boolean flagged,
        Map<String, Boolean> categories,
        Map<String, Double> scores
    ) {}
}

Specific Category Checks

public class CategoryChecker {
    private final ModerationModel moderationModel;

    public boolean containsHateSpeech(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isHate();
    }

    public boolean containsViolence(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isViolence();
    }

    public boolean containsSexualContent(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isSexual();
    }

    public boolean containsSelfHarm(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isSelfHarm();
    }

    private Moderation moderate(String text) {
        ModerationPrompt prompt = new ModerationPrompt(text);
        ModerationResponse response = moderationModel.call(prompt);
        return response.getResult().getOutput();
    }
}

Moderation Report

public class ModerationReport {
    private final ModerationModel moderationModel;

    public String generateReport(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        StringBuilder report = new StringBuilder();
        report.append("=== MODERATION REPORT ===\n");
        report.append("Model: ").append(moderation.getModel()).append("\n");
        report.append("Flagged: ").append(moderation.isFlagged()).append("\n\n");

        report.append("Categories:\n");
        Categories cats = moderation.getCategories();
        report.append("  Hate: ").append(cats.isHate()).append("\n");
        report.append("  Violence: ").append(cats.isViolence()).append("\n");
        report.append("  Sexual: ").append(cats.isSexual()).append("\n");
        report.append("  Self-harm: ").append(cats.isSelfHarm()).append("\n");
        report.append("  Harassment: ").append(cats.isHarassment()).append("\n\n");

        report.append("Scores:\n");
        CategoryScores scores = moderation.getCategoryScores();
        report.append(String.format("  Hate: %.3f\n", scores.getHate()));
        report.append(String.format("  Violence: %.3f\n", scores.getViolence()));
        report.append(String.format("  Sexual: %.3f\n", scores.getSexual()));
        report.append(String.format("  Self-harm: %.3f\n", scores.getSelfHarm()));
        report.append(String.format("  Harassment: %.3f\n", scores.getHarassment()));

        return report.toString();
    }
}

Error Handling

public ModerationResult safeModerate(String content) {
    try {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        return new ModerationResult(
            true,
            moderation.isFlagged(),
            moderation.getCategories()
        );
    } catch (Exception e) {
        System.err.println("Moderation failed: " + e.getMessage());

        // Default to safe/flagged on error
        return new ModerationResult(false, true, null);
    }
}

record ModerationResult(boolean success, boolean flagged, Categories categories) {}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-model@1.1.1