Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development
The document model provides the core abstractions for representing content (text and media) with metadata in Spring AI applications.
The document model consists of:
Base interface for all content types.
package org.springframework.ai.content;
interface Content {
/**
* Get the text content.
* @return the content text
*/
String getText();
/**
* Get the metadata associated with the content.
* @return metadata map
*/
Map<String, Object> getMetadata();
}Interface for content that includes media attachments.
package org.springframework.ai.content;
interface MediaContent extends Content {
/**
* Get the list of media attachments.
* @return list of Media objects
*/
List<Media> getMedia();
}Main container for document content (either text OR media, not both) with metadata, unique ID, and optional relevance score.
package org.springframework.ai.document;
class Document implements Content {
// Static field
static final ContentFormatter DEFAULT_CONTENT_FORMATTER;
// Constructors
Document(String content);
Document(String text, Map<String, Object> metadata);
Document(String id, String text, Map<String, Object> metadata);
Document(Media media, Map<String, Object> metadata);
Document(String id, Media media, Map<String, Object> metadata);
// Builder
static Builder builder();
// Core methods
String getId();
String getText();
boolean isText();
Media getMedia();
Map<String, Object> getMetadata();
Double getScore();
// Formatting methods
String getFormattedContent();
String getFormattedContent(MetadataMode metadataMode);
String getFormattedContent(ContentFormatter formatter, MetadataMode metadataMode);
// Content formatter management
ContentFormatter getContentFormatter();
void setContentFormatter(ContentFormatter contentFormatter);
// Builder from current state
Builder mutate();
}class Document.Builder {
Builder idGenerator(IdGenerator idGenerator);
Builder id(String id);
Builder text(String text);
Builder media(Media media);
Builder metadata(Map<String, Object> metadata);
Builder metadata(String key, Object value);
Builder score(Double score);
Document build();
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;
import java.util.Map;
// Create simple text document
Document doc1 = new Document("This is the content");
// Create document with metadata
Document doc2 = new Document(
"Document content",
Map.of(
"source", "user-input",
"category", "research",
"timestamp", System.currentTimeMillis()
)
);
// Create document with builder
Document doc3 = Document.builder()
.id("custom-id-123")
.text("Content with custom ID")
.metadata("author", "John Doe")
.metadata("version", 1)
.score(0.95)
.build();
// Create media document
Media image = Media.builder()
.mimeType(Media.Format.IMAGE_PNG)
.data(new ClassPathResource("image.png"))
.name("product-image")
.build();
Document mediaDoc = Document.builder()
.media(image)
.metadata("product_id", "12345")
.build();
// Get formatted content (includes metadata based on mode)
String formattedAll = doc3.getFormattedContent(MetadataMode.ALL);
String formattedInference = doc3.getFormattedContent(MetadataMode.INFERENCE);
String formattedEmbed = doc3.getFormattedContent(MetadataMode.EMBED);
String formattedNone = doc3.getFormattedContent(MetadataMode.NONE);
// Check document type
if (doc1.isText()) {
String text = doc1.getText();
// Process text
}
if (mediaDoc.getMedia() != null) {
byte[] mediaData = mediaDoc.getMedia().getDataAsByteArray();
// Process media
}
// Create modified version using mutate
Document modified = doc3.mutate()
.score(0.98)
.metadata("reviewed", true)
.build();
// Note: mutate() creates a builder for modifying the document.
// Metadata may be shared between the original and mutated documents.
// For complete isolation, create a new document with new metadata.Represents media data (images, videos, documents) with MIME type, data, and metadata.
package org.springframework.ai.content;
import org.springframework.util.MimeType;
import org.springframework.core.io.Resource;
import java.net.URI;
class Media {
// Constructors
Media(MimeType mimeType, URI uri);
Media(MimeType mimeType, Resource resource);
// Builder
static Builder builder();
// Methods
MimeType getMimeType();
Object getData();
byte[] getDataAsByteArray();
String getId();
String getName();
}class Media.Builder {
Builder mimeType(MimeType mimeType);
Builder data(Resource resource);
Builder data(Object data);
Builder data(URI uri);
Builder id(String id);
Builder name(String name);
Media build();
}MIME type constants for common media formats.
class Media.Format {
// Document formats
static final MimeType DOC_PDF;
static final MimeType DOC_CSV;
static final MimeType DOC_DOC;
static final MimeType DOC_DOCX;
static final MimeType DOC_XLS;
static final MimeType DOC_XLSX;
static final MimeType DOC_HTML;
static final MimeType DOC_TXT;
static final MimeType DOC_MD;
// Video formats
static final MimeType VIDEO_MKV;
static final MimeType VIDEO_MOV;
static final MimeType VIDEO_MP4;
static final MimeType VIDEO_WEBM;
static final MimeType VIDEO_FLV;
static final MimeType VIDEO_MPEG;
static final MimeType VIDEO_MPG;
static final MimeType VIDEO_WMV;
static final MimeType VIDEO_THREE_GP;
// Image formats
static final MimeType IMAGE_PNG;
static final MimeType IMAGE_JPEG;
static final MimeType IMAGE_GIF;
static final MimeType IMAGE_WEBP;
}import org.springframework.ai.content.Media;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import java.net.URI;
// Create media from URI
Media media1 = new Media(
Media.Format.IMAGE_PNG,
URI.create("https://example.com/image.png")
);
// Create media from resource
Media media2 = new Media(
Media.Format.DOC_PDF,
new ClassPathResource("document.pdf")
);
// Create media with builder
Media media3 = Media.builder()
.mimeType(Media.Format.VIDEO_MP4)
.data(new FileSystemResource("/path/to/video.mp4"))
.id("video-001")
.name("tutorial-video")
.build();
// Create image media for AI processing
Media productImage = Media.builder()
.mimeType(Media.Format.IMAGE_JPEG)
.data(new ClassPathResource("products/item-123.jpg"))
.name("product-image")
.build();
// Access media data
MimeType type = media3.getMimeType();
byte[] data = media3.getDataAsByteArray();
String mediaName = media3.getName();
// Use media in document
Document docWithMedia = Document.builder()
.media(productImage)
.metadata("product_id", "123")
.metadata("category", "electronics")
.build();name field can be a potential prompt injection vector. Only set name from trusted sources.getData() method returns the raw data (URI string or byte array)getDataAsByteArray() to always get byte array representationControls which metadata is included when formatting document content for AI processing.
package org.springframework.ai.document;
enum MetadataMode {
/**
* Include all metadata.
*/
ALL,
/**
* Include only metadata relevant for embedding operations.
*/
EMBED,
/**
* Include only metadata relevant for inference/generation operations.
*/
INFERENCE,
/**
* Exclude all metadata.
*/
NONE
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;
Document doc = Document.builder()
.text("Document content")
.metadata("source", "web")
.metadata("author", "Jane")
.metadata("embedding_info", "technical")
.build();
// Different metadata modes
String withAll = doc.getFormattedContent(MetadataMode.ALL);
String forEmbedding = doc.getFormattedContent(MetadataMode.EMBED);
String forInference = doc.getFormattedContent(MetadataMode.INFERENCE);
String contentOnly = doc.getFormattedContent(MetadataMode.NONE);The specific metadata keys included in EMBED vs INFERENCE modes are controlled by the ContentFormatter (see Content Formatting documentation).
Common metadata keys used by DocumentReaders and other components.
package org.springframework.ai.document;
enum DocumentMetadata {
/**
* Distance/similarity score metadata key.
*/
DISTANCE("distance");
String value();
}import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentMetadata;
// Access distance/similarity score
Document doc = // ... retrieved from vector store
Double distance = (Double) doc.getMetadata().get(DocumentMetadata.DISTANCE.value());
// Set distance metadata
Document docWithDistance = Document.builder()
.text("Content")
.metadata(DocumentMetadata.DISTANCE.value(), 0.123)
.build();Documents automatically receive unique IDs. You can customize ID generation or provide explicit IDs.
import org.springframework.ai.document.Document;
import org.springframework.ai.document.id.IdGenerator;
import org.springframework.ai.document.id.RandomIdGenerator;
import org.springframework.ai.document.id.JdkSha256HexIdGenerator;
// Default: random UUID
Document doc1 = new Document("Content"); // Gets random UUID
// Explicit ID
Document doc2 = Document.builder()
.id("my-custom-id")
.text("Content")
.build();
// Custom ID generator (random)
Document doc3 = Document.builder()
.idGenerator(new RandomIdGenerator())
.text("Content")
.build();
// Content-based ID (SHA-256 hash-derived UUID)
IdGenerator hashGen = new JdkSha256HexIdGenerator();
Document doc4 = Document.builder()
.idGenerator(hashGen)
.text("Content")
.build();
// Same content will always generate same UUID (36 characters)
// Note: JdkSha256HexIdGenerator returns a UUID v5 (36 characters with hyphens)
// derived from the SHA-256 hash of the content, NOT the raw hexadecimal hash.
// Example: "550e8400-e29b-41d4-a716-446655440000"See Document Processing documentation for details on IdGenerator interface and implementations.
Install with Tessl CLI
npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons