Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers
Content safety and moderation capabilities for detecting harmful content including hate speech, violence, sexual content, self-harm, and other policy violations with category-specific scores and flags.
Main interface for content moderation.
public interface ModerationModel extends Model<ModerationPrompt, ModerationResponse> {
/**
* Check content for policy violations.
*
* @param request the moderation prompt
* @return the moderation response with flags and scores
*/
ModerationResponse call(ModerationPrompt request);
}Request for content moderation.
public class ModerationPrompt implements ModelRequest<ModerationMessage> {
/**
* Construct a ModerationPrompt from text.
*
* @param text the text to moderate
*/
public ModerationPrompt(String text);
/**
* Construct a ModerationPrompt from a message.
*
* @param message the moderation message
*/
public ModerationPrompt(ModerationMessage message);
/**
* Construct a ModerationPrompt with message and options.
*
* @param message the moderation message
* @param options the moderation options
*/
public ModerationPrompt(ModerationMessage message, ModerationOptions options);
/**
* Get the moderation message.
*
* @return the message
*/
ModerationMessage getInstructions();
/**
* Get the moderation options.
*
* @return the options
*/
ModerationOptions getOptions();
}Response containing moderation results.
public class ModerationResponse implements ModelResponse<Generation> {
/**
* Construct a ModerationResponse with a generation.
*
* @param generation the moderation generation
*/
public ModerationResponse(Generation generation);
/**
* Construct a ModerationResponse with multiple generations.
*
* @param generations the list of moderation generations
*/
public ModerationResponse(List<Generation> generations);
/**
* Construct a ModerationResponse with generations and metadata.
*
* @param generations the list of moderation generations
* @param metadata the response metadata
*/
public ModerationResponse(
List<Generation> generations,
ModerationResponseMetadata metadata
);
/**
* Get the first moderation result.
*
* @return the first generation
*/
Generation getResult();
/**
* Get all moderation results.
*
* @return list of generations
*/
List<Generation> getResults();
/**
* Get response metadata.
*
* @return the moderation response metadata
*/
ModerationResponseMetadata getMetadata();
}Single moderation result.
public class Generation implements ModelResult<Moderation> {
/**
* Construct a Generation with moderation.
*
* @param moderation the moderation result
*/
public Generation(Moderation moderation);
/**
* Construct a Generation with moderation and metadata.
*
* @param moderation the moderation result
* @param metadata the generation metadata
*/
public Generation(Moderation moderation, ModerationGenerationMetadata metadata);
/**
* Get the moderation result.
*
* @return the moderation
*/
Moderation getOutput();
/**
* Get generation metadata.
*
* @return the moderation generation metadata
*/
ModerationGenerationMetadata getMetadata();
}Detailed moderation result with flags and scores.
public final class Moderation {
/**
* Get the result ID.
*
* @return the ID
*/
String getId();
/**
* Get the model name used for moderation.
*
* @return the model name
*/
String getModel();
/**
* Get the list of moderation results.
*
* @return list of moderation results
*/
List<ModerationResult> getResults();
/**
* Create a new moderation builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing Moderation instances.
*/
public static final class Builder {
/**
* Set the moderation ID.
*
* @param id the ID
* @return this builder
*/
Builder id(String id);
/**
* Set the model name.
*
* @param model the model name
* @return this builder
*/
Builder model(String model);
/**
* Set the moderation results.
*
* @param results the list of moderation results
* @return this builder
*/
Builder results(List<ModerationResult> results);
/**
* Build the Moderation instance.
*
* @return the moderation
*/
Moderation build();
}
}Individual moderation result indicating whether content was flagged, with detailed categories and scores.
public final class ModerationResult {
/**
* Check if content was flagged as violating moderation policies.
*
* @return true if content was flagged
*/
boolean isFlagged();
/**
* Set whether content was flagged.
*
* @param flagged true if flagged
*/
void setFlagged(boolean flagged);
/**
* Get the categories of moderation violations.
*
* @return the categories
*/
Categories getCategories();
/**
* Set the categories of moderation violations.
*
* @param categories the categories
*/
void setCategories(Categories categories);
/**
* Get the detailed scores for each category.
*
* @return the category scores
*/
CategoryScores getCategoryScores();
/**
* Set the detailed scores for each category.
*
* @param categoryScores the category scores
*/
void setCategoryScores(CategoryScores categoryScores);
/**
* Create a new moderation result builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing ModerationResult instances.
*/
public static final class Builder {
/**
* Set whether content was flagged.
*
* @param flagged true if flagged
* @return this builder
*/
Builder flagged(boolean flagged);
/**
* Set the categories.
*
* @param categories the categories
* @return this builder
*/
Builder categories(Categories categories);
/**
* Set the category scores.
*
* @param categoryScores the category scores
* @return this builder
*/
Builder categoryScores(CategoryScores categoryScores);
/**
* Build the ModerationResult instance.
*
* @return the moderation result
*/
ModerationResult build();
}
}Boolean flags for each moderation category.
public final class Categories {
/**
* Check if content is sexual in nature.
*
* @return true if sexual content detected
*/
boolean isSexual();
/**
* Check if content contains hate speech.
*
* @return true if hate speech detected
*/
boolean isHate();
/**
* Check if content contains harassment.
*
* @return true if harassment detected
*/
boolean isHarassment();
/**
* Check if content relates to self-harm.
*
* @return true if self-harm content detected
*/
boolean isSelfHarm();
/**
* Check if content contains sexual content involving minors.
*
* @return true if sexual/minors content detected
*/
boolean isSexualMinors();
/**
* Check if content contains hate speech with threatening language.
*
* @return true if hate/threatening detected
*/
boolean isHateThreatening();
/**
* Check if content contains graphic violence.
*
* @return true if graphic violence detected
*/
boolean isViolenceGraphic();
/**
* Check if content expresses intent to self-harm.
*
* @return true if self-harm intent detected
*/
boolean isSelfHarmIntent();
/**
* Check if content contains instructions for self-harm.
*
* @return true if self-harm instructions detected
*/
boolean isSelfHarmInstructions();
/**
* Check if content contains threatening harassment.
*
* @return true if harassment/threatening detected
*/
boolean isHarassmentThreatening();
/**
* Check if content contains violence.
*
* @return true if violence detected
*/
boolean isViolence();
/**
* Check if content contains dangerous and criminal content.
*
* @return true if dangerous/criminal content detected
*/
boolean isDangerousAndCriminalContent();
/**
* Check if content relates to health.
*
* @return true if health content detected
*/
boolean isHealth();
/**
* Check if content relates to financial matters.
*
* @return true if financial content detected
*/
boolean isFinancial();
/**
* Check if content relates to law.
*
* @return true if law content detected
*/
boolean isLaw();
/**
* Check if content contains personally identifiable information.
*
* @return true if PII detected
*/
boolean isPii();
/**
* Create a new categories builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing Categories instances.
*/
public static final class Builder {
Builder sexual(boolean sexual);
Builder hate(boolean hate);
Builder harassment(boolean harassment);
Builder selfHarm(boolean selfHarm);
Builder sexualMinors(boolean sexualMinors);
Builder hateThreatening(boolean hateThreatening);
Builder violenceGraphic(boolean violenceGraphic);
Builder selfHarmIntent(boolean selfHarmIntent);
Builder selfHarmInstructions(boolean selfHarmInstructions);
Builder harassmentThreatening(boolean harassmentThreatening);
Builder violence(boolean violence);
Builder dangerousAndCriminalContent(boolean dangerousAndCriminalContent);
Builder health(boolean health);
Builder financial(boolean financial);
Builder law(boolean law);
Builder pii(boolean pii);
Categories build();
}
}Confidence scores for each moderation category (0.0 to 1.0).
public final class CategoryScores {
/**
* Get sexual content score.
*
* @return score from 0.0 (low confidence) to 1.0 (high confidence)
*/
double getSexual();
/**
* Get hate speech score.
*
* @return score from 0.0 to 1.0
*/
double getHate();
/**
* Get harassment score.
*
* @return score from 0.0 to 1.0
*/
double getHarassment();
/**
* Get self-harm score.
*
* @return score from 0.0 to 1.0
*/
double getSelfHarm();
/**
* Get sexual/minors content score.
*
* @return score from 0.0 to 1.0
*/
double getSexualMinors();
/**
* Get hate/threatening score.
*
* @return score from 0.0 to 1.0
*/
double getHateThreatening();
/**
* Get graphic violence score.
*
* @return score from 0.0 to 1.0
*/
double getViolenceGraphic();
/**
* Get self-harm intent score.
*
* @return score from 0.0 to 1.0
*/
double getSelfHarmIntent();
/**
* Get self-harm instructions score.
*
* @return score from 0.0 to 1.0
*/
double getSelfHarmInstructions();
/**
* Get harassment/threatening score.
*
* @return score from 0.0 to 1.0
*/
double getHarassmentThreatening();
/**
* Get violence score.
*
* @return score from 0.0 to 1.0
*/
double getViolence();
/**
* Get dangerous and criminal content score.
*
* @return score from 0.0 to 1.0
*/
double getDangerousAndCriminalContent();
/**
* Get health score.
*
* @return score from 0.0 to 1.0
*/
double getHealth();
/**
* Get financial score.
*
* @return score from 0.0 to 1.0
*/
double getFinancial();
/**
* Get law score.
*
* @return score from 0.0 to 1.0
*/
double getLaw();
/**
* Get PII score.
*
* @return score from 0.0 to 1.0
*/
double getPii();
/**
* Create a new category scores builder.
*
* @return a new builder
*/
static Builder builder();
/**
* Builder for constructing CategoryScores instances.
*/
public static final class Builder {
Builder sexual(double sexual);
Builder hate(double hate);
Builder harassment(double harassment);
Builder selfHarm(double selfHarm);
Builder sexualMinors(double sexualMinors);
Builder hateThreatening(double hateThreatening);
Builder violenceGraphic(double violenceGraphic);
Builder selfHarmIntent(double selfHarmIntent);
Builder selfHarmInstructions(double selfHarmInstructions);
Builder harassmentThreatening(double harassmentThreatening);
Builder violence(double violence);
Builder dangerousAndCriminalContent(double dangerousAndCriminalContent);
Builder health(double health);
Builder financial(double financial);
Builder law(double law);
Builder pii(double pii);
CategoryScores build();
}
}Message containing text to moderate.
public class ModerationMessage {
/**
* Construct a ModerationMessage with text.
*
* @param text the text to moderate
*/
public ModerationMessage(String text);
/**
* Get the text content.
*
* @return the text
*/
String getText();
}Options for configuring moderation.
public interface ModerationOptions extends ModelOptions {
/**
* Get the model name to use for moderation.
*
* @return the model name
*/
String getModel();
}Metadata for moderation generation.
public interface ModerationGenerationMetadata extends ResultMetadata {
// Generation-specific metadata
}Metadata for moderation responses.
public class ModerationResponseMetadata extends AbstractResponseMetadata {
// Response-level metadata
}import org.springframework.ai.moderation.ModerationModel;
import org.springframework.ai.moderation.ModerationPrompt;
import org.springframework.ai.moderation.ModerationResponse;
import org.springframework.ai.moderation.Moderation;
import org.springframework.beans.factory.annotation.Autowired;
@Service
public class ModerationService {
@Autowired
private ModerationModel moderationModel;
public boolean isContentSafe(String content) {
ModerationPrompt prompt = new ModerationPrompt(content);
ModerationResponse response = moderationModel.call(prompt);
Moderation result = response.getResult().getOutput();
// Returns false if content is flagged
return !result.isFlagged();
}
}ModerationPrompt prompt = new ModerationPrompt("Content to check");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
// Check if flagged
if (moderation.isFlagged()) {
System.out.println("Content flagged!");
// Check specific categories
Categories categories = moderation.getCategories();
if (categories.isHate()) {
System.out.println("Contains hate speech");
}
if (categories.isViolence()) {
System.out.println("Contains violence");
}
if (categories.isSexual()) {
System.out.println("Contains sexual content");
}
if (categories.isSelfHarm()) {
System.out.println("Contains self-harm content");
}
if (categories.isHarassment()) {
System.out.println("Contains harassment");
}
}ModerationPrompt prompt = new ModerationPrompt("Content to analyze");
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
// Get confidence scores
CategoryScores scores = moderation.getCategoryScores();
System.out.println("Moderation Scores:");
System.out.println("Hate: " + scores.getHate());
System.out.println("Violence: " + scores.getViolence());
System.out.println("Sexual: " + scores.getSexual());
System.out.println("Self-harm: " + scores.getSelfHarm());
System.out.println("Harassment: " + scores.getHarassment());
// Use custom threshold
double threshold = 0.5;
if (scores.getHate() > threshold) {
System.out.println("High confidence hate speech detected");
}@Service
public class CustomModerationService {
private final ModerationModel moderationModel;
private static final double HIGH_RISK_THRESHOLD = 0.8;
private static final double MEDIUM_RISK_THRESHOLD = 0.5;
public ModerationResult moderateContent(String content) {
ModerationPrompt prompt = new ModerationPrompt(content);
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
CategoryScores scores = moderation.getCategoryScores();
// Determine risk level
double maxScore = getMaxScore(scores);
if (maxScore >= HIGH_RISK_THRESHOLD) {
return new ModerationResult("BLOCKED", "High risk content");
} else if (maxScore >= MEDIUM_RISK_THRESHOLD) {
return new ModerationResult("REVIEW", "Requires review");
} else {
return new ModerationResult("ALLOWED", "Content is safe");
}
}
private double getMaxScore(CategoryScores scores) {
return Math.max(
Math.max(scores.getHate(), scores.getViolence()),
Math.max(scores.getSexual(),
Math.max(scores.getSelfHarm(), scores.getHarassment())
)
);
}
record ModerationResult(String action, String reason) {}
}public List<ModerationResult> moderateMultipleTexts(List<String> texts) {
List<ModerationResult> results = new ArrayList<>();
for (String text : texts) {
ModerationPrompt prompt = new ModerationPrompt(text);
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
results.add(new ModerationResult(
text,
moderation.isFlagged(),
moderation.getCategories()
));
}
return results;
}
record ModerationResult(String text, boolean flagged, Categories categories) {}@Service
public class ChatModerationService {
private final ModerationModel moderationModel;
public boolean canSendMessage(String message) {
ModerationPrompt prompt = new ModerationPrompt(message);
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
// Block flagged content
if (moderation.isFlagged()) {
Categories categories = moderation.getCategories();
// Log which categories were flagged
logViolations(categories);
return false;
}
return true;
}
private void logViolations(Categories categories) {
List<String> violations = new ArrayList<>();
if (categories.isHate()) violations.add("hate");
if (categories.isViolence()) violations.add("violence");
if (categories.isSexual()) violations.add("sexual");
if (categories.isSelfHarm()) violations.add("self-harm");
if (categories.isHarassment()) violations.add("harassment");
System.out.println("Content blocked for: " + String.join(", ", violations));
}
}@RestController
@RequestMapping("/api/moderation")
public class ModerationController {
private final ModerationModel moderationModel;
public ModerationController(ModerationModel moderationModel) {
this.moderationModel = moderationModel;
}
@PostMapping("/check")
public ModerationCheckResult checkContent(@RequestBody ModerationCheckRequest request) {
ModerationPrompt prompt = new ModerationPrompt(request.text());
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
return new ModerationCheckResult(
moderation.isFlagged(),
convertCategories(moderation.getCategories()),
convertScores(moderation.getCategoryScores())
);
}
private Map<String, Boolean> convertCategories(Categories categories) {
return Map.of(
"hate", categories.isHate(),
"violence", categories.isViolence(),
"sexual", categories.isSexual(),
"selfHarm", categories.isSelfHarm(),
"harassment", categories.isHarassment()
);
}
private Map<String, Double> convertScores(CategoryScores scores) {
return Map.of(
"hate", scores.getHate(),
"violence", scores.getViolence(),
"sexual", scores.getSexual(),
"selfHarm", scores.getSelfHarm(),
"harassment", scores.getHarassment()
);
}
record ModerationCheckRequest(String text) {}
record ModerationCheckResult(
boolean flagged,
Map<String, Boolean> categories,
Map<String, Double> scores
) {}
}public class CategoryChecker {
private final ModerationModel moderationModel;
public boolean containsHateSpeech(String text) {
Moderation mod = moderate(text);
return mod.getCategories().isHate();
}
public boolean containsViolence(String text) {
Moderation mod = moderate(text);
return mod.getCategories().isViolence();
}
public boolean containsSexualContent(String text) {
Moderation mod = moderate(text);
return mod.getCategories().isSexual();
}
public boolean containsSelfHarm(String text) {
Moderation mod = moderate(text);
return mod.getCategories().isSelfHarm();
}
private Moderation moderate(String text) {
ModerationPrompt prompt = new ModerationPrompt(text);
ModerationResponse response = moderationModel.call(prompt);
return response.getResult().getOutput();
}
}public class ModerationReport {
private final ModerationModel moderationModel;
public String generateReport(String content) {
ModerationPrompt prompt = new ModerationPrompt(content);
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
StringBuilder report = new StringBuilder();
report.append("=== MODERATION REPORT ===\n");
report.append("Model: ").append(moderation.getModel()).append("\n");
report.append("Flagged: ").append(moderation.isFlagged()).append("\n\n");
report.append("Categories:\n");
Categories cats = moderation.getCategories();
report.append(" Hate: ").append(cats.isHate()).append("\n");
report.append(" Violence: ").append(cats.isViolence()).append("\n");
report.append(" Sexual: ").append(cats.isSexual()).append("\n");
report.append(" Self-harm: ").append(cats.isSelfHarm()).append("\n");
report.append(" Harassment: ").append(cats.isHarassment()).append("\n\n");
report.append("Scores:\n");
CategoryScores scores = moderation.getCategoryScores();
report.append(String.format(" Hate: %.3f\n", scores.getHate()));
report.append(String.format(" Violence: %.3f\n", scores.getViolence()));
report.append(String.format(" Sexual: %.3f\n", scores.getSexual()));
report.append(String.format(" Self-harm: %.3f\n", scores.getSelfHarm()));
report.append(String.format(" Harassment: %.3f\n", scores.getHarassment()));
return report.toString();
}
}public ModerationResult safeModerate(String content) {
try {
ModerationPrompt prompt = new ModerationPrompt(content);
ModerationResponse response = moderationModel.call(prompt);
Moderation moderation = response.getResult().getOutput();
return new ModerationResult(
true,
moderation.isFlagged(),
moderation.getCategories()
);
} catch (Exception e) {
System.err.println("Moderation failed: " + e.getMessage());
// Default to safe/flagged on error
return new ModerationResult(false, true, null);
}
}
record ModerationResult(boolean success, boolean flagged, Categories categories) {}