CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-starter-model-openai

Spring Boot Starter for OpenAI integration providing auto-configuration for chat completion, embeddings, image generation, audio speech synthesis, audio transcription, and content moderation models. Includes high-level ChatClient API and conversation memory support.

Overview
Eval results
Files

moderation-model.mddocs/reference/

Moderation Model

The OpenAiModerationModel checks content for policy violations and detects harmful content across multiple categories.

Import

import org.springframework.ai.openai.OpenAiModerationModel;
import org.springframework.ai.openai.OpenAiModerationOptions;
import org.springframework.ai.model.moderation.ModerationPrompt;
import org.springframework.ai.model.moderation.ModerationResponse;
import org.springframework.ai.model.moderation.Moderation;
import org.springframework.ai.model.moderation.ModerationResult;
import org.springframework.ai.model.moderation.ModerationMessage;
import org.springframework.ai.model.moderation.Categories;
import org.springframework.ai.model.moderation.CategoryScores;

API

OpenAiModerationModel

package org.springframework.ai.openai;

public class OpenAiModerationModel implements ModerationModel {
    // Check content for moderation issues
    public ModerationResponse call(ModerationPrompt moderationPrompt);

    // Get default options
    public OpenAiModerationOptions getDefaultOptions();

    // Create with default options
    public static OpenAiModerationModel withDefaultOptions(OpenAiModerationOptions options);
}

OpenAiModerationOptions

package org.springframework.ai.openai;

public class OpenAiModerationOptions implements ModerationOptions {
    // Builder methods
    public static OpenAiModerationOptions.Builder builder();

    // Getters
    public String getModel();

    // Setters
    public void setModel(String model);
}

Moderation

package org.springframework.ai.model.moderation;

public class Moderation {
    public String getId();
    public String getModel();
    public List<ModerationResult> getResults();
}

ModerationResult

package org.springframework.ai.model.moderation;

public class ModerationResult {
    public boolean isFlagged();
    public Categories getCategories();
    public CategoryScores getCategoryScores();
}

Categories

package org.springframework.ai.model.moderation;

public class Categories {
    public boolean isSexual();
    public boolean isHate();
    public boolean isHarassment();
    public boolean isSelfHarm();
    public boolean isViolence();
    public boolean isViolenceGraphic();
    public boolean isSexualMinors();
    public boolean isHateThreatening();
    public boolean isHarassmentThreatening();
    public boolean isSelfHarmIntent();
    public boolean isSelfHarmInstructions();
    public boolean isDangerousAndCriminalContent();
    public boolean isHealth();
    public boolean isFinancial();
    public boolean isLaw();
    public boolean isPii();
}

CategoryScores

package org.springframework.ai.model.moderation;

public class CategoryScores {
    public double getSexual();
    public double getHate();
    public double getHarassment();
    public double getSelfHarm();
    public double getViolence();
    public double getViolenceGraphic();
    public double getSexualMinors();
    public double getHateThreatening();
    public double getHarassmentThreatening();
    public double getSelfHarmIntent();
    public double getSelfHarmInstructions();
    public double getDangerousAndCriminalContent();
    public double getHealth();
    public double getFinancial();
    public double getLaw();
    public double getPii();
}

Usage Examples

Basic Content Moderation

import org.springframework.ai.openai.OpenAiModerationModel;
import org.springframework.ai.model.moderation.ModerationPrompt;
import org.springframework.ai.model.moderation.ModerationResponse;
import org.springframework.ai.model.moderation.Moderation;
import org.springframework.ai.model.moderation.ModerationResult;
import org.springframework.stereotype.Service;

@Service
public class ContentModerationService {

    private final OpenAiModerationModel moderationModel;

    public ContentModerationService(OpenAiModerationModel moderationModel) {
        this.moderationModel = moderationModel;
    }

    public boolean isContentSafe(String content) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        for (ModerationResult result : moderation.getResults()) {
            if (result.isFlagged()) {
                return false;
            }
        }
        return true;
    }
}

Detailed Moderation Analysis

import org.springframework.ai.model.moderation.Categories;
import org.springframework.ai.model.moderation.CategoryScores;
import java.util.Map;
import java.util.HashMap;

public Map<String, Object> analyzeContent(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    Map<String, Object> analysis = new HashMap<>();

    for (ModerationResult result : moderation.getResults()) {
        analysis.put("flagged", result.isFlagged());

        Categories categories = result.getCategories();
        Map<String, Boolean> categoryMap = new HashMap<>();
        categoryMap.put("sexual", categories.isSexual());
        categoryMap.put("hate", categories.isHate());
        categoryMap.put("harassment", categories.isHarassment());
        categoryMap.put("self-harm", categories.isSelfHarm());
        categoryMap.put("violence", categories.isViolence());
        categoryMap.put("violence/graphic", categories.isViolenceGraphic());
        analysis.put("categories", categoryMap);

        CategoryScores scores = result.getCategoryScores();
        Map<String, Double> scoresMap = new HashMap<>();
        scoresMap.put("sexual", scores.getSexual());
        scoresMap.put("hate", scores.getHate());
        scoresMap.put("harassment", scores.getHarassment());
        scoresMap.put("self-harm", scores.getSelfHarm());
        scoresMap.put("violence", scores.getViolence());
        scoresMap.put("violence/graphic", scores.getViolenceGraphic());
        analysis.put("categoryScores", scoresMap);
    }

    return analysis;
}

Check Multiple Texts

import java.util.List;
import java.util.stream.Collectors;

public List<Boolean> moderateMultiple(List<String> texts) {
    return texts.stream()
        .map(text -> {
            ModerationPrompt prompt = new ModerationPrompt(text);
            ModerationResponse response = moderationModel.call(prompt);
            Moderation moderation = response.getResult().getOutput();

            for (ModerationResult result : moderation.getResults()) {
                if (result.isFlagged()) {
                    return false;
                }
            }
            return true;
        })
        .collect(Collectors.toList());
}

Category-Specific Checks

public boolean hasViolence(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        Categories categories = result.getCategories();
        if (categories.isViolence()) {
            return true;
        }
    }
    return false;
}

public boolean hasHate(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        Categories categories = result.getCategories();
        if (categories.isHate()) {
            return true;
        }
    }
    return false;
}

public boolean hasSexual(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        Categories categories = result.getCategories();
        if (categories.isSexual()) {
            return true;
        }
    }
    return false;
}

Threshold-Based Moderation

public boolean isContentSafeWithThreshold(String content, double threshold) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        CategoryScores scores = result.getCategoryScores();

        if (scores.getSexual() >= threshold ||
            scores.getHate() >= threshold ||
            scores.getHarassment() >= threshold ||
            scores.getSelfHarm() >= threshold ||
            scores.getViolence() >= threshold ||
            scores.getViolenceGraphic() >= threshold ||
            scores.getSexualMinors() >= threshold ||
            scores.getHateThreatening() >= threshold ||
            scores.getHarassmentThreatening() >= threshold ||
            scores.getSelfHarmIntent() >= threshold ||
            scores.getSelfHarmInstructions() >= threshold) {
            return false;
        }
    }
    return true;
}

// Example: Flag content if any category exceeds 0.7
public boolean isSafe(String content) {
    return isContentSafeWithThreshold(content, 0.7);
}

Detailed Category Analysis

public void printModerationDetails(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    System.out.println("Model: " + moderation.getModel());
    System.out.println("ID: " + moderation.getId());

    for (ModerationResult result : moderation.getResults()) {
        System.out.println("\nContent flagged: " + result.isFlagged());

        Categories categories = result.getCategories();
        System.out.println("\nCategory Flags:");
        System.out.println("  sexual: " + categories.isSexual());
        System.out.println("  hate: " + categories.isHate());
        System.out.println("  harassment: " + categories.isHarassment());
        System.out.println("  self-harm: " + categories.isSelfHarm());
        System.out.println("  violence: " + categories.isViolence());
        System.out.println("  violence/graphic: " + categories.isViolenceGraphic());
        System.out.println("  sexual/minors: " + categories.isSexualMinors());
        System.out.println("  hate/threatening: " + categories.isHateThreatening());
        System.out.println("  harassment/threatening: " + categories.isHarassmentThreatening());
        System.out.println("  self-harm/intent: " + categories.isSelfHarmIntent());
        System.out.println("  self-harm/instructions: " + categories.isSelfHarmInstructions());

        CategoryScores scores = result.getCategoryScores();
        System.out.println("\nCategory Scores:");
        System.out.println("  sexual: " + String.format("%.4f", scores.getSexual()));
        System.out.println("  hate: " + String.format("%.4f", scores.getHate()));
        System.out.println("  harassment: " + String.format("%.4f", scores.getHarassment()));
        System.out.println("  self-harm: " + String.format("%.4f", scores.getSelfHarm()));
        System.out.println("  violence: " + String.format("%.4f", scores.getViolence()));
        System.out.println("  violence/graphic: " + String.format("%.4f", scores.getViolenceGraphic()));
        System.out.println("  sexual/minors: " + String.format("%.4f", scores.getSexualMinors()));
        System.out.println("  hate/threatening: " + String.format("%.4f", scores.getHateThreatening()));
        System.out.println("  harassment/threatening: " + String.format("%.4f", scores.getHarassmentThreatening()));
        System.out.println("  self-harm/intent: " + String.format("%.4f", scores.getSelfHarmIntent()));
        System.out.println("  self-harm/instructions: " + String.format("%.4f", scores.getSelfHarmInstructions()));
    }
}

Moderation with Options

import org.springframework.ai.openai.OpenAiModerationOptions;

public boolean moderateWithOptions(String content) {
    OpenAiModerationOptions options = OpenAiModerationOptions.builder()
        .model("text-moderation-latest")
        .build();

    ModerationPrompt prompt = new ModerationPrompt(content, options);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        if (result.isFlagged()) {
            return false;
        }
    }
    return true;
}

Content Filter Implementation

import java.util.List;
import java.util.ArrayList;

public String filterContent(String content) {
    ModerationPrompt prompt = new ModerationPrompt(content);
    ModerationResponse response = moderationModel.call(prompt);
    Moderation moderation = response.getResult().getOutput();

    for (ModerationResult result : moderation.getResults()) {
        if (result.isFlagged()) {
            List<String> flaggedCategories = new ArrayList<>();
            Categories categories = result.getCategories();

            if (categories.isSexual()) flaggedCategories.add("sexual");
            if (categories.isHate()) flaggedCategories.add("hate");
            if (categories.isHarassment()) flaggedCategories.add("harassment");
            if (categories.isSelfHarm()) flaggedCategories.add("self-harm");
            if (categories.isViolence()) flaggedCategories.add("violence");
            if (categories.isViolenceGraphic()) flaggedCategories.add("violence/graphic");
            if (categories.isSexualMinors()) flaggedCategories.add("sexual/minors");
            if (categories.isHateThreatening()) flaggedCategories.add("hate/threatening");
            if (categories.isHarassmentThreatening()) flaggedCategories.add("harassment/threatening");
            if (categories.isSelfHarmIntent()) flaggedCategories.add("self-harm/intent");
            if (categories.isSelfHarmInstructions()) flaggedCategories.add("self-harm/instructions");

            return "Content blocked due to: " + String.join(", ", flaggedCategories);
        }
    }

    return content;
}

Batch Processing

import java.util.Map;
import java.util.HashMap;

public Map<String, Boolean> moderateBatch(List<String> contents) {
    Map<String, Boolean> results = new HashMap<>();

    for (String content : contents) {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        boolean safe = true;
        for (ModerationResult result : moderation.getResults()) {
            if (result.isFlagged()) {
                safe = false;
                break;
            }
        }
        results.put(content, safe);
    }

    return results;
}

Configuration

Configure via application.properties:

# API Connection
spring.ai.openai.moderation.api-key=sk-...
spring.ai.openai.moderation.base-url=https://api.openai.com
spring.ai.openai.moderation.project-id=proj_...
spring.ai.openai.moderation.organization-id=org-...

# Model Configuration (optional)
spring.ai.openai.moderation.options.model=omni-moderation-latest

Model Options

Available Models

  • omni-moderation-latest - Latest omni-modal moderation model (default, automatically updated)
  • text-moderation-latest - Latest text-only moderation model
  • text-moderation-stable - Stable text-only version, updated less frequently

Moderation Categories

The moderation model checks for the following categories:

Primary Categories:

  • hate - Content promoting hate based on identity
  • hate/threatening - Hateful content with violence or threats
  • harassment - Content intended to harass, threaten, or bully
  • harassment/threatening - Harassment with threats
  • self-harm - Content promoting self-harm
  • self-harm/intent - Content expressing intent for self-harm
  • self-harm/instructions - Instructions for self-harm
  • sexual - Sexual content intended to arouse
  • sexual/minors - Sexual content involving minors
  • violence - Content depicting violence
  • violence/graphic - Graphic violent content

Additional Categories (omni-moderation models):

  • dangerous-and-criminal-content - Content related to dangerous or criminal activities
  • health - Health-related content that may require professional advice
  • financial - Financial advice or content requiring professional guidance
  • law - Legal content or advice
  • pii - Personally Identifiable Information

Each category has:

  • Boolean flag: Whether content violates this category
  • Confidence score: 0.0-1.0 indicating violation likelihood

Response Structure

ModerationResponse

package org.springframework.ai.model.moderation;

public class ModerationResponse {
    public Generation getResult();
    public List<Generation> getResults();
    public ModerationResponseMetadata getMetadata();
}

The Generation object contains the Moderation output via getOutput().

Error Handling

import org.springframework.ai.openai.api.OpenAiApiException;

public boolean safeModerate(String content) {
    try {
        ModerationPrompt prompt = new ModerationPrompt(content);
        ModerationResponse response = moderationModel.call(prompt);
        Moderation moderation = response.getResult().getOutput();

        for (ModerationResult result : moderation.getResults()) {
            if (result.isFlagged()) {
                return false;
            }
        }
        return true;
    } catch (OpenAiApiException e) {
        // Handle API errors (rate limits, invalid API key, etc.)
        // In case of error, default to flagging content as unsafe
        return false;
    }
}

Types

Builder

package org.springframework.ai.openai;

public static class Builder {
    public Builder model(String model);
    public OpenAiModerationOptions build();
}

ModerationPrompt

package org.springframework.ai.model.moderation;

public class ModerationPrompt {
    public ModerationPrompt(String text);
    public ModerationPrompt(String text, ModerationOptions options);
    public ModerationPrompt(ModerationMessage message);
    public ModerationPrompt(ModerationMessage message, ModerationOptions options);
    public ModerationPrompt(List<ModerationMessage> messages);
    public ModerationPrompt(List<ModerationMessage> messages, ModerationOptions options);

    public ModerationMessage getInstructions();
    public ModerationOptions getOptions();
}

ModerationMessage

package org.springframework.ai.model.moderation;

public class ModerationMessage {
    public ModerationMessage(String content);

    public String getContent();
}

Best Practices

  1. Use for user-generated content: Always moderate content before displaying publicly
  2. Check before AI generation: Moderate prompts before sending to generation models
  3. Combine with application logic: Use category scores for nuanced filtering
  4. Handle errors conservatively: Default to blocking content if moderation fails
  5. Log violations: Track flagged content for audit and improvement
  6. Set appropriate thresholds: Adjust sensitivity based on your application's needs
  7. Respect user privacy: Only moderate content when necessary for safety
  8. Iterate through results: Always process all ModerationResult objects in the results list

Use Cases

  • User input validation: Check forum posts, comments, chat messages
  • Content generation safety: Validate prompts before sending to AI models
  • Content curation: Filter user-submitted content in galleries or feeds
  • Compliance: Ensure content meets platform policies and legal requirements
  • Automated moderation: First-line defense before human review
tessl i tessl/maven-org-springframework-ai--spring-ai-starter-model-openai@1.1.1

docs

index.md

tile.json