Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development
Content formatting converts Document objects into AI-friendly text representations with configurable metadata inclusion.
The content formatting layer consists of:
These components control how document text and metadata are combined for AI operations like embeddings and inference.
Base interface for converting documents to formatted strings.
package org.springframework.ai.document;
interface ContentFormatter {
/**
* Format a document with specified metadata mode.
* @param document the document to format
* @param mode metadata inclusion mode (ALL, EMBED, INFERENCE, NONE)
* @return formatted string representation
*/
String format(Document document, MetadataMode mode);
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.ContentFormatter;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
import java.util.Map;
// Create document with metadata
Document doc = Document.builder()
.text("AI enables intelligent applications")
.metadata("source", "documentation")
.metadata("category", "technical")
.metadata("timestamp", System.currentTimeMillis())
.build();
// Create formatter
ContentFormatter formatter = DefaultContentFormatter.defaultConfig();
// Format with different modes
String withAll = formatter.format(doc, MetadataMode.ALL);
String forEmbedding = formatter.format(doc, MetadataMode.EMBED);
String forInference = formatter.format(doc, MetadataMode.INFERENCE);
String contentOnly = formatter.format(doc, MetadataMode.NONE);
System.out.println("ALL: " + withAll);
System.out.println("EMBED: " + forEmbedding);
System.out.println("INFERENCE: " + forInference);
System.out.println("NONE: " + contentOnly);Configurable ContentFormatter implementation with template support and metadata filtering.
package org.springframework.ai.document;
import java.util.List;
class DefaultContentFormatter implements ContentFormatter {
/**
* Create builder for custom configuration.
* @return builder instance
*/
static Builder builder();
/**
* Get default configuration instance.
* Uses default templates and no metadata exclusions.
* @return default formatter
*/
static DefaultContentFormatter defaultConfig();
/**
* Format document according to metadata mode.
* @param document document to format
* @param metadataMode metadata inclusion mode
* @return formatted string
*/
String format(Document document, MetadataMode metadataMode);
/**
* Get metadata template.
* Template for formatting individual metadata entries.
* Default: "%s: %s"
* @return metadata template string
*/
String getMetadataTemplate();
/**
* Get metadata separator.
* Separator between metadata entries.
* Default: "\n"
* @return metadata separator string
*/
String getMetadataSeparator();
/**
* Get text template.
* Template for combining metadata and content.
* Default: "%s\n\n%s"
* @return text template string
*/
String getTextTemplate();
/**
* Get excluded inference metadata keys.
* Metadata keys excluded in INFERENCE mode.
* @return list of excluded keys
*/
List<String> getExcludedInferenceMetadataKeys();
/**
* Get excluded embed metadata keys.
* Metadata keys excluded in EMBED mode.
* @return list of excluded keys
*/
List<String> getExcludedEmbedMetadataKeys();
}class DefaultContentFormatter.Builder {
/**
* Initialize from existing formatter.
* @param contentFormatter formatter to copy settings from
* @return this builder
*/
Builder from(DefaultContentFormatter contentFormatter);
/**
* Set metadata template.
* Template for formatting individual metadata entries.
* Use %s placeholders for key and value.
* @param template template string (e.g., "%s: %s")
* @return this builder
*/
Builder withMetadataTemplate(String template);
/**
* Set metadata separator.
* Separator between metadata entries.
* @param separator separator string (e.g., "\n", ", ")
* @return this builder
*/
Builder withMetadataSeparator(String separator);
/**
* Set text template.
* Template for combining metadata and content.
* First %s is metadata, second %s is content.
* @param template template string (e.g., "%s\n\n%s")
* @return this builder
*/
Builder withTextTemplate(String template);
/**
* Set excluded inference metadata keys (varargs).
* @param keys keys to exclude in INFERENCE mode
* @return this builder
*/
Builder withExcludedInferenceMetadataKeys(String... keys);
/**
* Set excluded inference metadata keys (list).
* @param keys keys to exclude in INFERENCE mode
* @return this builder
*/
Builder withExcludedInferenceMetadataKeys(List<String> keys);
/**
* Set excluded embed metadata keys (varargs).
* @param keys keys to exclude in EMBED mode
* @return this builder
*/
Builder withExcludedEmbedMetadataKeys(String... keys);
/**
* Set excluded embed metadata keys (list).
* @param keys keys to exclude in EMBED mode
* @return this builder
*/
Builder withExcludedEmbedMetadataKeys(List<String> keys);
/**
* Build the formatter.
* @return configured DefaultContentFormatter
*/
DefaultContentFormatter build();
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
import java.util.List;
import java.util.Map;
// Use default configuration
DefaultContentFormatter defaultFormatter = DefaultContentFormatter.defaultConfig();
Document doc = Document.builder()
.text("Spring AI simplifies AI integration")
.metadata("author", "Spring Team")
.metadata("version", "1.0")
.metadata("internal_id", "12345")
.build();
String formatted = defaultFormatter.format(doc, MetadataMode.ALL);
// Output:
// author: Spring Team
// version: 1.0
// internal_id: 12345
//
// Spring AI simplifies AI integration
// Custom template formatting
DefaultContentFormatter customFormatter = DefaultContentFormatter.builder()
.withMetadataTemplate("[%s]=%s") // Custom metadata format
.withMetadataSeparator(", ") // Comma separator instead of newline
.withTextTemplate("Metadata: %s\nContent: %s") // Custom text template
.build();
String customFormatted = customFormatter.format(doc, MetadataMode.ALL);
// Output:
// Metadata: [author]=Spring Team, [version]=1.0, [internal_id]=12345
// Content: Spring AI simplifies AI integration
// Exclude sensitive metadata for embeddings
DefaultContentFormatter embedFormatter = DefaultContentFormatter.builder()
.withExcludedEmbedMetadataKeys("internal_id", "timestamp", "user_id")
.build();
String forEmbedding = embedFormatter.format(doc, MetadataMode.EMBED);
// Excludes internal_id, timestamp, user_id from embedding
// Exclude technical metadata for inference
DefaultContentFormatter inferenceFormatter = DefaultContentFormatter.builder()
.withExcludedInferenceMetadataKeys("embedding_vector", "chunk_index")
.build();
String forInference = inferenceFormatter.format(doc, MetadataMode.INFERENCE);
// Excludes embedding_vector, chunk_index from inference
// Combine multiple exclusions
DefaultContentFormatter advancedFormatter = DefaultContentFormatter.builder()
.withExcludedEmbedMetadataKeys(List.of("internal_id", "db_timestamp"))
.withExcludedInferenceMetadataKeys(List.of("embedding_vector", "similarity_score"))
.withMetadataTemplate("%s=%s")
.withMetadataSeparator(" | ")
.build();
// Copy and modify existing formatter
DefaultContentFormatter modifiedFormatter = DefaultContentFormatter.builder()
.from(defaultFormatter)
.withMetadataSeparator("; ")
.build();The default configuration uses:
"%s: %s" - Formats as "key: value" (uses String.format style)"\n" - Each metadata entry on new line"%s\n\n%s" - Metadata, blank line, then content (uses String.format style)Note: Templates use Java String.format() syntax with positional arguments (%s).
Custom templates with placeholder syntax like {text} or {metadata} are not supported.
Use the format specifiers (%s) to reference metadata and content in the proper order.
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
Document doc = Document.builder()
.text("Document content")
.metadata("source", "web")
.metadata("author", "Jane")
.metadata("embedding_model", "text-embedding-3")
.metadata("chunk_id", "chunk_5")
.build();
DefaultContentFormatter formatter = DefaultContentFormatter.builder()
.withExcludedEmbedMetadataKeys("chunk_id") // Don't embed chunk_id
.withExcludedInferenceMetadataKeys("embedding_model") // Don't send embedding_model to LLM
.build();
// MetadataMode.ALL - includes everything
String all = formatter.format(doc, MetadataMode.ALL);
// source, author, embedding_model, chunk_id all included
// MetadataMode.EMBED - excludes chunk_id
String embed = formatter.format(doc, MetadataMode.EMBED);
// source, author, embedding_model included
// chunk_id excluded
// MetadataMode.INFERENCE - excludes embedding_model
String inference = formatter.format(doc, MetadataMode.INFERENCE);
// source, author, chunk_id included
// embedding_model excluded
// MetadataMode.NONE - no metadata
String none = formatter.format(doc, MetadataMode.NONE);
// Only "Document content" returnedDocumentTransformer that applies a ContentFormatter to documents.
package org.springframework.ai.transformer;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentTransformer;
import org.springframework.ai.document.ContentFormatter;
import java.util.List;
class ContentFormatTransformer implements DocumentTransformer {
/**
* Create transformer with formatter.
* @param contentFormatter formatter to apply
*/
ContentFormatTransformer(ContentFormatter contentFormatter);
/**
* Create transformer with formatter and template rewrite control.
* @param contentFormatter formatter to apply
* @param disableTemplateRewrite if true, disable template rewriting
*/
ContentFormatTransformer(ContentFormatter contentFormatter, boolean disableTemplateRewrite);
/**
* Apply formatter to documents.
* @param documents documents to format
* @return formatted documents
*/
List<Document> apply(List<Document> documents);
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentTransformer;
import org.springframework.ai.transformer.ContentFormatTransformer;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
import java.util.List;
// Create documents
List<Document> docs = List.of(
Document.builder()
.text("First document")
.metadata("priority", "high")
.build(),
Document.builder()
.text("Second document")
.metadata("priority", "low")
.build()
);
// Create formatter
DefaultContentFormatter formatter = DefaultContentFormatter.builder()
.withMetadataTemplate("[%s] %s")
.build();
// Create transformer
DocumentTransformer transformer = new ContentFormatTransformer(formatter);
// Apply formatting
List<Document> formatted = transformer.apply(docs);
// Access formatted content
for (Document doc : formatted) {
// The formatter is now associated with each document
String content = doc.getFormattedContent(MetadataMode.ALL);
System.out.println(content);
}
// Disable template rewrite
DocumentTransformer noRewriteTransformer = new ContentFormatTransformer(
formatter,
true // disable template rewrite
);import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.ai.reader.TextReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.transformer.ContentFormatTransformer;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.core.io.ClassPathResource;
import java.util.List;
// Read documents
DocumentReader reader = new TextReader(new ClassPathResource("data.txt"));
List<Document> documents = reader.get();
// Create formatting transformer
ContentFormatTransformer formatter = new ContentFormatTransformer(
DefaultContentFormatter.builder()
.withExcludedEmbedMetadataKeys("internal_id")
.build()
);
// Create splitting transformer
TokenTextSplitter splitter = TokenTextSplitter.builder()
.withChunkSize(500)
.build();
// Chain transformers: split then format
List<Document> processed = formatter.apply(splitter.apply(documents));
// Or compose as single function
var pipeline = splitter.andThen(formatter);
List<Document> result = pipeline.apply(documents);import org.springframework.ai.document.Document;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
import org.springframework.ai.transformer.ContentFormatTransformer;
// Configure formatter for RAG retrieval
DefaultContentFormatter ragFormatter = DefaultContentFormatter.builder()
// Exclude technical metadata from embedding
.withExcludedEmbedMetadataKeys(
"document_id",
"chunk_index",
"timestamp",
"embedding_model"
)
// Exclude retrieval metadata from inference
.withExcludedInferenceMetadataKeys(
"similarity_score",
"embedding_vector",
"index_id"
)
// Format metadata clearly
.withMetadataTemplate("- %s: %s")
.withMetadataSeparator("\n")
.withTextTemplate("Document Metadata:\n%s\n\nContent:\n%s")
.build();
// Apply to documents before embedding
ContentFormatTransformer embedTransformer = new ContentFormatTransformer(ragFormatter);
List<Document> docsForEmbedding = embedTransformer.apply(documents);
// Get formatted content for embedding
for (Document doc : docsForEmbedding) {
String embedContent = doc.getFormattedContent(MetadataMode.EMBED);
// Send embedContent to embedding model
}
// Get formatted content for LLM inference
for (Document doc : retrievedDocs) {
String llmContent = doc.getFormattedContent(MetadataMode.INFERENCE);
// Include llmContent in prompt to LLM
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
// English formatter
DefaultContentFormatter englishFormatter = DefaultContentFormatter.builder()
.withMetadataTemplate("%s: %s")
.withTextTemplate("Metadata:\n%s\n\nContent:\n%s")
.build();
// Compact formatter for shorter context windows
DefaultContentFormatter compactFormatter = DefaultContentFormatter.builder()
.withMetadataTemplate("%s=%s")
.withMetadataSeparator("; ")
.withTextTemplate("[%s] %s")
.build();
Document doc = Document.builder()
.text("Important content")
.metadata("lang", "en")
.metadata("topic", "AI")
.build();
// Verbose format
String verbose = englishFormatter.format(doc, MetadataMode.ALL);
// Compact format (saves tokens)
String compact = compactFormatter.format(doc, MetadataMode.ALL);import org.springframework.ai.document.Document;
import org.springframework.ai.document.DefaultContentFormatter;
import org.springframework.ai.document.MetadataMode;
// Formatter that excludes PII from external API calls
DefaultContentFormatter privacyFormatter = DefaultContentFormatter.builder()
.withExcludedEmbedMetadataKeys(
"user_email",
"user_id",
"ip_address",
"session_id"
)
.withExcludedInferenceMetadataKeys(
"user_email",
"user_id",
"ip_address",
"session_id"
)
.build();
Document userDoc = Document.builder()
.text("User query content")
.metadata("user_email", "user@example.com")
.metadata("session_id", "abc123")
.metadata("query_category", "support")
.build();
// Safe for embedding API - PII excluded
String safeForEmbedding = privacyFormatter.format(userDoc, MetadataMode.EMBED);
// Safe for LLM API - PII excluded
String safeForInference = privacyFormatter.format(userDoc, MetadataMode.INFERENCE);
// Full metadata for internal processing
String fullInternal = privacyFormatter.format(userDoc, MetadataMode.ALL);Thread Safety:
DefaultContentFormatter is immutable and thread-safe after constructionContentFormatTransformer is stateless and thread-safePerformance:
Common Exceptions:
NullPointerException: If document or metadataMode is nullIllegalArgumentException: If template format strings are invalid (wrong number of %s placeholders)Edge Cases:
// Empty metadata - formats with text only
Document doc = new Document("text");
String formatted = formatter.format(doc, MetadataMode.ALL); // Just text, no metadata section
// Null metadata values - converted to string "null"
Document doc = Document.builder()
.text("content")
.metadata("key", null)
.build();
String formatted = formatter.format(doc, MetadataMode.ALL); // "key: null"
// Empty text - formats metadata only
Document doc = Document.builder()
.text("")
.build();
// Will throw IllegalArgumentException - text cannot be emptyInstall with Tessl CLI
npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons@1.1.0