CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-model

Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers

Overview
Eval results
Files

moderation.mddocs/reference/

Content Moderation

Content safety and moderation capabilities for detecting harmful content including hate speech, violence, sexual content, self-harm, and other policy violations with category-specific scores and flags.

Capabilities

ModerationModel Interface

Main interface for content moderation.

public interface ModerationModel extends Model<ModerationPrompt, ModerationResponse> {
    /**
     * Check content for policy violations.
     *
     * @param request the moderation prompt
     * @return the moderation response with flags and scores
     */
    ModerationResponse call(ModerationPrompt request);
}

ModerationPrompt

Request for content moderation.

public class ModerationPrompt implements ModelRequest<ModerationMessage> {
    /**
     * Construct a ModerationPrompt from text.
     *
     * @param text the text to moderate
     */
    public ModerationPrompt(String text);

    /**
     * Construct a ModerationPrompt from a message.
     *
     * @param message the moderation message
     */
    public ModerationPrompt(ModerationMessage message);

    /**
     * Construct a ModerationPrompt with message and options.
     *
     * @param message the moderation message
     * @param options the moderation options
     */
    public ModerationPrompt(ModerationMessage message, ModerationOptions options);

    /**
     * Get the moderation message.
     *
     * @return the message
     */
    ModerationMessage getInstructions();

    /**
     * Get the moderation options.
     *
     * @return the options
     */
    ModerationOptions getOptions();
}

ModerationResponse

Response containing moderation results.

public class ModerationResponse implements ModelResponse<Generation> {
    /**
     * Construct a ModerationResponse with a generation.
     *
     * @param generation the moderation generation
     */
    public ModerationResponse(Generation generation);

    /**
     * Construct a ModerationResponse with multiple generations.
     *
     * @param generations the list of moderation generations
     */
    public ModerationResponse(List<Generation> generations);

    /**
     * Construct a ModerationResponse with generations and metadata.
     *
     * @param generations the list of moderation generations
     * @param metadata the response metadata
     */
    public ModerationResponse(
        List<Generation> generations,
        ModerationResponseMetadata metadata
    );

    /**
     * Get the first moderation result.
     *
     * @return the first generation
     */
    Generation getResult();

    /**
     * Get all moderation results.
     *
     * @return list of generations
     */
    List<Generation> getResults();

    /**
     * Get response metadata.
     *
     * @return the moderation response metadata
     */
    ModerationResponseMetadata getMetadata();
}

Generation

Single moderation result.

public class Generation implements ModelResult<Moderation> {
    /**
     * Construct a Generation with moderation.
     *
     * @param moderation the moderation result
     */
    public Generation(Moderation moderation);

    /**
     * Construct a Generation with moderation and metadata.
     *
     * @param moderation the moderation result
     * @param metadata the generation metadata
     */
    public Generation(Moderation moderation, ModerationGenerationMetadata metadata);

    /**
     * Get the moderation result.
     *
     * @return the moderation
     */
    Moderation getOutput();

    /**
     * Get generation metadata.
     *
     * @return the moderation generation metadata
     */
    ModerationGenerationMetadata getMetadata();
}

Moderation

Detailed moderation result with flags and scores.

public final class Moderation {
    /**
     * Get the result ID.
     *
     * @return the ID
     */
    String getId();

    /**
     * Get the model name used for moderation.
     *
     * @return the model name
     */
    String getModel();

    /**
     * Get the list of moderation results.
     *
     * @return list of moderation results
     */
    List<ModerationResult> getResults();

    /**
     * Create a new moderation builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing Moderation instances.
     */
    public static final class Builder {
        /**
         * Set the moderation ID.
         *
         * @param id the ID
         * @return this builder
         */
        Builder id(String id);

        /**
         * Set the model name.
         *
         * @param model the model name
         * @return this builder
         */
        Builder model(String model);

        /**
         * Set the moderation results.
         *
         * @param results the list of moderation results
         * @return this builder
         */
        Builder results(List<ModerationResult> results);

        /**
         * Build the Moderation instance.
         *
         * @return the moderation
         */
        Moderation build();
    }
}

ModerationResult

Individual moderation result indicating whether content was flagged, with detailed categories and scores.

public final class ModerationResult {
    /**
     * Check if content was flagged as violating moderation policies.
     *
     * @return true if content was flagged
     */
    boolean isFlagged();

    /**
     * Set whether content was flagged.
     *
     * @param flagged true if flagged
     */
    void setFlagged(boolean flagged);

    /**
     * Get the categories of moderation violations.
     *
     * @return the categories
     */
    Categories getCategories();

    /**
     * Set the categories of moderation violations.
     *
     * @param categories the categories
     */
    void setCategories(Categories categories);

    /**
     * Get the detailed scores for each category.
     *
     * @return the category scores
     */
    CategoryScores getCategoryScores();

    /**
     * Set the detailed scores for each category.
     *
     * @param categoryScores the category scores
     */
    void setCategoryScores(CategoryScores categoryScores);

    /**
     * Create a new moderation result builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing ModerationResult instances.
     */
    public static final class Builder {
        /**
         * Set whether content was flagged.
         *
         * @param flagged true if flagged
         * @return this builder
         */
        Builder flagged(boolean flagged);

        /**
         * Set the categories.
         *
         * @param categories the categories
         * @return this builder
         */
        Builder categories(Categories categories);

        /**
         * Set the category scores.
         *
         * @param categoryScores the category scores
         * @return this builder
         */
        Builder categoryScores(CategoryScores categoryScores);

        /**
         * Build the ModerationResult instance.
         *
         * @return the moderation result
         */
        ModerationResult build();
    }
}

Categories

Boolean flags for each moderation category.

public final class Categories {
    /**
     * Check if content is sexual in nature.
     *
     * @return true if sexual content detected
     */
    boolean isSexual();

    /**
     * Check if content contains hate speech.
     *
     * @return true if hate speech detected
     */
    boolean isHate();

    /**
     * Check if content contains harassment.
     *
     * @return true if harassment detected
     */
    boolean isHarassment();

    /**
     * Check if content relates to self-harm.
     *
     * @return true if self-harm content detected
     */
    boolean isSelfHarm();

    /**
     * Check if content contains sexual content involving minors.
     *
     * @return true if sexual/minors content detected
     */
    boolean isSexualMinors();

    /**
     * Check if content contains hate speech with threatening language.
     *
     * @return true if hate/threatening detected
     */
    boolean isHateThreatening();

    /**
     * Check if content contains graphic violence.
     *
     * @return true if graphic violence detected
     */
    boolean isViolenceGraphic();

    /**
     * Check if content expresses intent to self-harm.
     *
     * @return true if self-harm intent detected
     */
    boolean isSelfHarmIntent();

    /**
     * Check if content contains instructions for self-harm.
     *
     * @return true if self-harm instructions detected
     */
    boolean isSelfHarmInstructions();

    /**
     * Check if content contains threatening harassment.
     *
     * @return true if harassment/threatening detected
     */
    boolean isHarassmentThreatening();

    /**
     * Check if content contains violence.
     *
     * @return true if violence detected
     */
    boolean isViolence();

    /**
     * Check if content contains dangerous and criminal content.
     *
     * @return true if dangerous/criminal content detected
     */
    boolean isDangerousAndCriminalContent();

    /**
     * Check if content relates to health.
     *
     * @return true if health content detected
     */
    boolean isHealth();

    /**
     * Check if content relates to financial matters.
     *
     * @return true if financial content detected
     */
    boolean isFinancial();

    /**
     * Check if content relates to law.
     *
     * @return true if law content detected
     */
    boolean isLaw();

    /**
     * Check if content contains personally identifiable information.
     *
     * @return true if PII detected
     */
    boolean isPii();

    /**
     * Create a new categories builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing Categories instances.
     */
    public static final class Builder {
        Builder sexual(boolean sexual);
        Builder hate(boolean hate);
        Builder harassment(boolean harassment);
        Builder selfHarm(boolean selfHarm);
        Builder sexualMinors(boolean sexualMinors);
        Builder hateThreatening(boolean hateThreatening);
        Builder violenceGraphic(boolean violenceGraphic);
        Builder selfHarmIntent(boolean selfHarmIntent);
        Builder selfHarmInstructions(boolean selfHarmInstructions);
        Builder harassmentThreatening(boolean harassmentThreatening);
        Builder violence(boolean violence);
        Builder dangerousAndCriminalContent(boolean dangerousAndCriminalContent);
        Builder health(boolean health);
        Builder financial(boolean financial);
        Builder law(boolean law);
        Builder pii(boolean pii);
        Categories build();
    }
}

CategoryScores

Confidence scores for each moderation category (0.0 to 1.0).

public final class CategoryScores {
    /**
     * Get sexual content score.
     *
     * @return score from 0.0 (low confidence) to 1.0 (high confidence)
     */
    double getSexual();

    /**
     * Get hate speech score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHate();

    /**
     * Get harassment score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHarassment();

    /**
     * Get self-harm score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarm();

    /**
     * Get sexual/minors content score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSexualMinors();

    /**
     * Get hate/threatening score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHateThreatening();

    /**
     * Get graphic violence score.
     *
     * @return score from 0.0 to 1.0
     */
    double getViolenceGraphic();

    /**
     * Get self-harm intent score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarmIntent();

    /**
     * Get self-harm instructions score.
     *
     * @return score from 0.0 to 1.0
     */
    double getSelfHarmInstructions();

    /**
     * Get harassment/threatening score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHarassmentThreatening();

    /**
     * Get violence score.
     *
     * @return score from 0.0 to 1.0
     */
    double getViolence();

    /**
     * Get dangerous and criminal content score.
     *
     * @return score from 0.0 to 1.0
     */
    double getDangerousAndCriminalContent();

    /**
     * Get health score.
     *
     * @return score from 0.0 to 1.0
     */
    double getHealth();

    /**
     * Get financial score.
     *
     * @return score from 0.0 to 1.0
     */
    double getFinancial();

    /**
     * Get law score.
     *
     * @return score from 0.0 to 1.0
     */
    double getLaw();

    /**
     * Get PII score.
     *
     * @return score from 0.0 to 1.0
     */
    double getPii();

    /**
     * Create a new category scores builder.
     *
     * @return a new builder
     */
    static Builder builder();

    /**
     * Builder for constructing CategoryScores instances.
     */
    public static final class Builder {
        Builder sexual(double sexual);
        Builder hate(double hate);
        Builder harassment(double harassment);
        Builder selfHarm(double selfHarm);
        Builder sexualMinors(double sexualMinors);
        Builder hateThreatening(double hateThreatening);
        Builder violenceGraphic(double violenceGraphic);
        Builder selfHarmIntent(double selfHarmIntent);
        Builder selfHarmInstructions(double selfHarmInstructions);
        Builder harassmentThreatening(double harassmentThreatening);
        Builder violence(double violence);
        Builder dangerousAndCriminalContent(double dangerousAndCriminalContent);
        Builder health(double health);
        Builder financial(double financial);
        Builder law(double law);
        Builder pii(double pii);
        CategoryScores build();
    }
}

ModerationMessage

Message containing text to moderate.

public class ModerationMessage {
    /**
     * Construct a ModerationMessage with text.
     *
     * @param text the text to moderate
     */
    public ModerationMessage(String text);

    /**
     * Get the text content.
     *
     * @return the text
     */
    String getText();
}

ModerationOptions Interface

Options for configuring moderation.

public interface ModerationOptions extends ModelOptions {
    /**
     * Get the model name to use for moderation.
     *
     * @return the model name
     */
    String getModel();
}

ModerationGenerationMetadata Interface

Metadata for moderation generation.

public interface ModerationGenerationMetadata extends ResultMetadata {
    // Generation-specific metadata
}

ModerationResponseMetadata

Metadata for moderation responses.

public class ModerationResponseMetadata extends AbstractResponseMetadata {
    // Response-level metadata
}

Usage Examples

Simple Content Moderation

import org.springframework.ai.moderation.ModerationModel;
import org.springframework.ai.moderation.ModerationPrompt;
import org.springframework.ai.moderation.ModerationResponse;
import org.springframework.ai.moderation.Moderation;
import org.springframework.beans.factory.annotation.Autowired;

@Service
public class ModerationService {
    @Autowired
    private ModerationModel moderationModel;

    public boolean isContentSafe(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation result = response.getResult().getOutput();

        // Returns false if content is flagged
        return !result.isFlagged();
    }
}

Detailed Moderation Check

ModerationPrompt prompt = new ModerationPrompt("Content to check");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();

// Check if flagged
if (moderation.isFlagged()) {
    System.out.println("Content flagged!");

    // Check specific categories
    Categories categories = moderation.getCategories();

    if (categories.isHate()) {
        System.out.println("Contains hate speech");
    }
    if (categories.isViolence()) {
        System.out.println("Contains violence");
    }
    if (categories.isSexual()) {
        System.out.println("Contains sexual content");
    }
    if (categories.isSelfHarm()) {
        System.out.println("Contains self-harm content");
    }
    if (categories.isHarassment()) {
        System.out.println("Contains harassment");
    }
}

Checking Category Scores

ModerationPrompt prompt = new ModerationPrompt("Content to analyze");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();

// Get confidence scores
CategoryScores scores = moderation.getCategoryScores();

System.out.println("Moderation Scores:");
System.out.println("Hate: " + scores.getHate());
System.out.println("Violence: " + scores.getViolence());
System.out.println("Sexual: " + scores.getSexual());
System.out.println("Self-harm: " + scores.getSelfHarm());
System.out.println("Harassment: " + scores.getHarassment());

// Use custom threshold
double threshold = 0.5;
if (scores.getHate() > threshold) {
    System.out.println("High confidence hate speech detected");
}

Custom Thresholds

@Service
public class CustomModerationService {
    private final ModerationModel moderationModel;
    private static final double HIGH_RISK_THRESHOLD = 0.8;
    private static final double MEDIUM_RISK_THRESHOLD = 0.5;

    public ModerationResult moderateContent(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        CategoryScores scores = moderation.getCategoryScores();

        // Determine risk level
        double maxScore = getMaxScore(scores);

        if (maxScore >= HIGH_RISK_THRESHOLD) {
            return new ModerationResult("BLOCKED", "High risk content");
        } else if (maxScore >= MEDIUM_RISK_THRESHOLD) {
            return new ModerationResult("REVIEW", "Requires review");
        } else {
            return new ModerationResult("ALLOWED", "Content is safe");
        }
    }

    private double getMaxScore(CategoryScores scores) {
        return Math.max(
            Math.max(scores.getHate(), scores.getViolence()),
            Math.max(scores.getSexual(),
                Math.max(scores.getSelfHarm(), scores.getHarassment())
            )
        );
    }

    record ModerationResult(String action, String reason) {}
}

Batch Moderation

public List<ModerationResult> moderateMultipleTexts(List<String> texts) {
    List<ModerationResult> results = new ArrayList<>();

    for (String text : texts) {
        ModerationPrompt prompt = new ModerationPrompt(text);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        results.add(new ModerationResult(
            text,
            moderation.isFlagged(),
            moderation.getCategories()
        ));
    }

    return results;
}

record ModerationResult(String text, boolean flagged, Categories categories) {}

Chat Message Moderation

@Service
public class ChatModerationService {
    private final ModerationModel moderationModel;

    public boolean canSendMessage(String message) {
        ModerationPrompt prompt = new ModerationPrompt(message);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        // Block flagged content
        if (moderation.isFlagged()) {
            Categories categories = moderation.getCategories();

            // Log which categories were flagged
            logViolations(categories);

            return false;
        }

        return true;
    }

    private void logViolations(Categories categories) {
        List<String> violations = new ArrayList<>();

        if (categories.isHate()) violations.add("hate");
        if (categories.isViolence()) violations.add("violence");
        if (categories.isSexual()) violations.add("sexual");
        if (categories.isSelfHarm()) violations.add("self-harm");
        if (categories.isHarassment()) violations.add("harassment");

        System.out.println("Content blocked for: " + String.join(", ", violations));
    }
}

REST API Example

@RestController
@RequestMapping("/api/moderation")
public class ModerationController {
    private final ModerationModel moderationModel;

    public ModerationController(ModerationModel moderationModel) {
        this.moderationModel = moderationModel;
    }

    @PostMapping("/check")
    public ModerationCheckResult checkContent(@RequestBody ModerationCheckRequest request) {
        ModerationPrompt prompt = new ModerationPrompt(request.text());
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        return new ModerationCheckResult(
            moderation.isFlagged(),
            convertCategories(moderation.getCategories()),
            convertScores(moderation.getCategoryScores())
        );
    }

    private Map<String, Boolean> convertCategories(Categories categories) {
        return Map.of(
            "hate", categories.isHate(),
            "violence", categories.isViolence(),
            "sexual", categories.isSexual(),
            "selfHarm", categories.isSelfHarm(),
            "harassment", categories.isHarassment()
        );
    }

    private Map<String, Double> convertScores(CategoryScores scores) {
        return Map.of(
            "hate", scores.getHate(),
            "violence", scores.getViolence(),
            "sexual", scores.getSexual(),
            "selfHarm", scores.getSelfHarm(),
            "harassment", scores.getHarassment()
        );
    }

    record ModerationCheckRequest(String text) {}

    record ModerationCheckResult(
        boolean flagged,
        Map<String, Boolean> categories,
        Map<String, Double> scores
    ) {}
}

Specific Category Checks

public class CategoryChecker {
    private final ModerationModel moderationModel;

    public boolean containsHateSpeech(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isHate();
    }

    public boolean containsViolence(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isViolence();
    }

    public boolean containsSexualContent(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isSexual();
    }

    public boolean containsSelfHarm(String text) {
        Moderation mod = moderate(text);
        return mod.getCategories().isSelfHarm();
    }

    private Moderation moderate(String text) {
        ModerationPrompt prompt = new ModerationPrompt(text);
        ModerationResponse response = moderationModel.call(prompt);
        return response.getResult().getOutput();
    }
}

Moderation Report

public class ModerationReport {
    private final ModerationModel moderationModel;

    public String generateReport(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        StringBuilder report = new StringBuilder();
        report.append("=== MODERATION REPORT ===\n");
        report.append("Model: ").append(moderation.getModel()).append("\n");
        report.append("Flagged: ").append(moderation.isFlagged()).append("\n\n");

        report.append("Categories:\n");
        Categories cats = moderation.getCategories();
        report.append("  Hate: ").append(cats.isHate()).append("\n");
        report.append("  Violence: ").append(cats.isViolence()).append("\n");
        report.append("  Sexual: ").append(cats.isSexual()).append("\n");
        report.append("  Self-harm: ").append(cats.isSelfHarm()).append("\n");
        report.append("  Harassment: ").append(cats.isHarassment()).append("\n\n");

        report.append("Scores:\n");
        CategoryScores scores = moderation.getCategoryScores();
        report.append(String.format("  Hate: %.3f\n", scores.getHate()));
        report.append(String.format("  Violence: %.3f\n", scores.getViolence()));
        report.append(String.format("  Sexual: %.3f\n", scores.getSexual()));
        report.append(String.format("  Self-harm: %.3f\n", scores.getSelfHarm()));
        report.append(String.format("  Harassment: %.3f\n", scores.getHarassment()));

        return report.toString();
    }
}

Error Handling

public ModerationResult safeModerate(String content) {
    try {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        return new ModerationResult(
            true,
            moderation.isFlagged(),
            moderation.getCategories()
        );
    } catch (Exception e) {
        System.err.println("Moderation failed: " + e.getMessage());

        // Default to safe/flagged on error
        return new ModerationResult(false, true, null);
    }
}

record ModerationResult(boolean success, boolean flagged, Categories categories) {}

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-model@1.1.1

docs

index.md

tile.json