CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-commons

Common classes used across Spring AI providing document processing, text transformation, embedding utilities, observability support, and tokenization capabilities for AI application development

Overview
Eval results
Files

document-model.mddocs/reference/

Document Model

The document model provides the core abstractions for representing content (text and media) with metadata in Spring AI applications.

Overview

The document model consists of:

  • Content - Base interface for all content types
  • MediaContent - Interface for content with media attachments
  • Document - Main container for text or media content with metadata
  • Media - Represents media files (images, videos, documents) with MIME types
  • MetadataMode - Controls metadata inclusion when formatting content

Capabilities

Content Interface

Base interface for all content types.

package org.springframework.ai.content;

interface Content {
    /**
     * Get the text content.
     * @return the content text
     */
    String getText();

    /**
     * Get the metadata associated with the content.
     * @return metadata map
     */
    Map<String, Object> getMetadata();
}

MediaContent Interface

Interface for content that includes media attachments.

package org.springframework.ai.content;

interface MediaContent extends Content {
    /**
     * Get the list of media attachments.
     * @return list of Media objects
     */
    List<Media> getMedia();
}

Document Class

Main container for document content (either text OR media, not both) with metadata, unique ID, and optional relevance score.

package org.springframework.ai.document;

class Document implements Content {
    // Static field
    static final ContentFormatter DEFAULT_CONTENT_FORMATTER;

    // Constructors
    Document(String content);
    Document(String text, Map<String, Object> metadata);
    Document(String id, String text, Map<String, Object> metadata);
    Document(Media media, Map<String, Object> metadata);
    Document(String id, Media media, Map<String, Object> metadata);

    // Builder
    static Builder builder();

    // Core methods
    String getId();
    String getText();
    boolean isText();
    Media getMedia();
    Map<String, Object> getMetadata();
    Double getScore();

    // Formatting methods
    String getFormattedContent();
    String getFormattedContent(MetadataMode metadataMode);
    String getFormattedContent(ContentFormatter formatter, MetadataMode metadataMode);

    // Content formatter management
    ContentFormatter getContentFormatter();
    void setContentFormatter(ContentFormatter contentFormatter);

    // Builder from current state
    Builder mutate();
}

Document.Builder

class Document.Builder {
    Builder idGenerator(IdGenerator idGenerator);
    Builder id(String id);
    Builder text(String text);
    Builder media(Media media);
    Builder metadata(Map<String, Object> metadata);
    Builder metadata(String key, Object value);
    Builder score(Double score);
    Document build();
}

Usage Examples

import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;
import java.util.Map;

// Create simple text document
Document doc1 = new Document("This is the content");

// Create document with metadata
Document doc2 = new Document(
    "Document content",
    Map.of(
        "source", "user-input",
        "category", "research",
        "timestamp", System.currentTimeMillis()
    )
);

// Create document with builder
Document doc3 = Document.builder()
    .id("custom-id-123")
    .text("Content with custom ID")
    .metadata("author", "John Doe")
    .metadata("version", 1)
    .score(0.95)
    .build();

// Create media document
Media image = Media.builder()
    .mimeType(Media.Format.IMAGE_PNG)
    .data(new ClassPathResource("image.png"))
    .name("product-image")
    .build();

Document mediaDoc = Document.builder()
    .media(image)
    .metadata("product_id", "12345")
    .build();

// Get formatted content (includes metadata based on mode)
String formattedAll = doc3.getFormattedContent(MetadataMode.ALL);
String formattedInference = doc3.getFormattedContent(MetadataMode.INFERENCE);
String formattedEmbed = doc3.getFormattedContent(MetadataMode.EMBED);
String formattedNone = doc3.getFormattedContent(MetadataMode.NONE);

// Check document type
if (doc1.isText()) {
    String text = doc1.getText();
    // Process text
}

if (mediaDoc.getMedia() != null) {
    byte[] mediaData = mediaDoc.getMedia().getDataAsByteArray();
    // Process media
}

// Create modified version using mutate
Document modified = doc3.mutate()
    .score(0.98)
    .metadata("reviewed", true)
    .build();
// Note: mutate() creates a builder for modifying the document.
// Metadata may be shared between the original and mutated documents.
// For complete isolation, create a new document with new metadata.

Media Class

Represents media data (images, videos, documents) with MIME type, data, and metadata.

package org.springframework.ai.content;

import org.springframework.util.MimeType;
import org.springframework.core.io.Resource;
import java.net.URI;

class Media {
    // Constructors
    Media(MimeType mimeType, URI uri);
    Media(MimeType mimeType, Resource resource);

    // Builder
    static Builder builder();

    // Methods
    MimeType getMimeType();
    Object getData();
    byte[] getDataAsByteArray();
    String getId();
    String getName();
}

Media.Builder

class Media.Builder {
    Builder mimeType(MimeType mimeType);
    Builder data(Resource resource);
    Builder data(Object data);
    Builder data(URI uri);
    Builder id(String id);
    Builder name(String name);
    Media build();
}

Media.Format Constants

MIME type constants for common media formats.

class Media.Format {
    // Document formats
    static final MimeType DOC_PDF;
    static final MimeType DOC_CSV;
    static final MimeType DOC_DOC;
    static final MimeType DOC_DOCX;
    static final MimeType DOC_XLS;
    static final MimeType DOC_XLSX;
    static final MimeType DOC_HTML;
    static final MimeType DOC_TXT;
    static final MimeType DOC_MD;

    // Video formats
    static final MimeType VIDEO_MKV;
    static final MimeType VIDEO_MOV;
    static final MimeType VIDEO_MP4;
    static final MimeType VIDEO_WEBM;
    static final MimeType VIDEO_FLV;
    static final MimeType VIDEO_MPEG;
    static final MimeType VIDEO_MPG;
    static final MimeType VIDEO_WMV;
    static final MimeType VIDEO_THREE_GP;

    // Image formats
    static final MimeType IMAGE_PNG;
    static final MimeType IMAGE_JPEG;
    static final MimeType IMAGE_GIF;
    static final MimeType IMAGE_WEBP;
}

Usage Examples

import org.springframework.ai.content.Media;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import java.net.URI;

// Create media from URI
Media media1 = new Media(
    Media.Format.IMAGE_PNG,
    URI.create("https://example.com/image.png")
);

// Create media from resource
Media media2 = new Media(
    Media.Format.DOC_PDF,
    new ClassPathResource("document.pdf")
);

// Create media with builder
Media media3 = Media.builder()
    .mimeType(Media.Format.VIDEO_MP4)
    .data(new FileSystemResource("/path/to/video.mp4"))
    .id("video-001")
    .name("tutorial-video")
    .build();

// Create image media for AI processing
Media productImage = Media.builder()
    .mimeType(Media.Format.IMAGE_JPEG)
    .data(new ClassPathResource("products/item-123.jpg"))
    .name("product-image")
    .build();

// Access media data
MimeType type = media3.getMimeType();
byte[] data = media3.getDataAsByteArray();
String mediaName = media3.getName();

// Use media in document
Document docWithMedia = Document.builder()
    .media(productImage)
    .metadata("product_id", "123")
    .metadata("category", "electronics")
    .build();

Important Notes

  • Security Warning: Setting the Media name field can be a potential prompt injection vector. Only set name from trusted sources.
  • Media data can be provided as URI, Resource, byte array, or any Object
  • The getData() method returns the raw data (URI string or byte array)
  • Use getDataAsByteArray() to always get byte array representation

MetadataMode Enum

Controls which metadata is included when formatting document content for AI processing.

package org.springframework.ai.document;

enum MetadataMode {
    /**
     * Include all metadata.
     */
    ALL,

    /**
     * Include only metadata relevant for embedding operations.
     */
    EMBED,

    /**
     * Include only metadata relevant for inference/generation operations.
     */
    INFERENCE,

    /**
     * Exclude all metadata.
     */
    NONE
}

Usage

import org.springframework.ai.document.Document;
import org.springframework.ai.document.MetadataMode;

Document doc = Document.builder()
    .text("Document content")
    .metadata("source", "web")
    .metadata("author", "Jane")
    .metadata("embedding_info", "technical")
    .build();

// Different metadata modes
String withAll = doc.getFormattedContent(MetadataMode.ALL);
String forEmbedding = doc.getFormattedContent(MetadataMode.EMBED);
String forInference = doc.getFormattedContent(MetadataMode.INFERENCE);
String contentOnly = doc.getFormattedContent(MetadataMode.NONE);

The specific metadata keys included in EMBED vs INFERENCE modes are controlled by the ContentFormatter (see Content Formatting documentation).

DocumentMetadata Enum

Common metadata keys used by DocumentReaders and other components.

package org.springframework.ai.document;

enum DocumentMetadata {
    /**
     * Distance/similarity score metadata key.
     */
    DISTANCE("distance");

    String value();
}

Usage

import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentMetadata;

// Access distance/similarity score
Document doc = // ... retrieved from vector store
Double distance = (Double) doc.getMetadata().get(DocumentMetadata.DISTANCE.value());

// Set distance metadata
Document docWithDistance = Document.builder()
    .text("Content")
    .metadata(DocumentMetadata.DISTANCE.value(), 0.123)
    .build();

ID Generation

Documents automatically receive unique IDs. You can customize ID generation or provide explicit IDs.

import org.springframework.ai.document.Document;
import org.springframework.ai.document.id.IdGenerator;
import org.springframework.ai.document.id.RandomIdGenerator;
import org.springframework.ai.document.id.JdkSha256HexIdGenerator;

// Default: random UUID
Document doc1 = new Document("Content"); // Gets random UUID

// Explicit ID
Document doc2 = Document.builder()
    .id("my-custom-id")
    .text("Content")
    .build();

// Custom ID generator (random)
Document doc3 = Document.builder()
    .idGenerator(new RandomIdGenerator())
    .text("Content")
    .build();

// Content-based ID (SHA-256 hash-derived UUID)
IdGenerator hashGen = new JdkSha256HexIdGenerator();
Document doc4 = Document.builder()
    .idGenerator(hashGen)
    .text("Content")
    .build();
// Same content will always generate same UUID (36 characters)
// Note: JdkSha256HexIdGenerator returns a UUID v5 (36 characters with hyphens)
// derived from the SHA-256 hash of the content, NOT the raw hexadecimal hash.
// Example: "550e8400-e29b-41d4-a716-446655440000"

See Document Processing documentation for details on IdGenerator interface and implementations.

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-commons

docs

index.md

README.md

tile.json