RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
Core data structures for representing content, entities, relationships, and retrievable items in the RAG framework.
Foundation interfaces for all data objects in the system.
Base interface for all data objects with metadata support.
sealed interface Datum {
val id: String
val uri: String?
val metadata: Map<String, Any?>
fun propertiesToPersist(): Map<String, Any?>
fun labels(): Set<String>
}Properties:
id: Unique identifier for the datumuri: Optional URI reference for the datummetadata: Key-value metadata associated with the datumMethods:
propertiesToPersist(): Returns properties to be persisted to storagelabels(): Returns set of type labels for the datumInterface for objects that have embeddings.
interface Embedded {
val embedding: Embedding?
}Properties:
embedding: Optional vector embedding for semantic searchInterface for objects that can be embedded.
interface Embeddable {
fun embeddableValue(): String
}Methods:
embeddableValue(): Returns text representation to be embeddedBase interface for RAG-retrievable objects with stable IDs.
interface Retrievable : HasInfoString, Datum, EmbeddableCombines:
HasInfoString: Can produce human-readable info stringsDatum: Has ID, URI, and metadataEmbeddable: Can be converted to embeddingBase interface for all content elements.
interface ContentElement : DatumRepresents any element in a content hierarchy.
Content elements that exist within a hierarchy.
interface HierarchicalContentElement : ContentElement {
val parentId: String?
}Properties:
parentId: ID of the parent element (null for root elements)Structures for representing hierarchical documents with sections and content.
Root of a structured document.
interface ContentRoot : HierarchicalContentElement {
override val uri: String // Required, non-null
val title: String
val ingestionTimestamp: Instant
}Properties:
uri: Required URI for the document (non-null)title: Document titleingestionTimestamp: When document was ingestedBase interface for all sections in the hierarchy.
sealed interface Section : HierarchicalContentElement {
val title: String
}Properties:
title: Section titleSection with direct children navigation.
interface NavigableSection : Section {
val children: Iterable<NavigableSection>
}Properties:
children: Direct child sectionsSection that contains child sections.
interface ContainerSection : SectionContainer section with navigation methods for traversing the hierarchy.
interface NavigableContainerSection : ContainerSection, NavigableSection {
/**
* Get all descendant sections (recursive)
*/
fun descendants(): Iterable<NavigableSection>
/**
* Get all descendant leaf sections
*/
fun leaves(): Iterable<LeafSection>
}Methods:
descendants(): Returns all descendant sections recursivelyleaves(): Returns all leaf sections (terminal nodes)Terminal section containing content without further subdivisions.
data class LeafSection(
val text: String,
val title: String,
val parentId: String?,
override val id: String,
override val uri: String?,
override val metadata: Map<String, Any?> = emptyMap()
) : NavigableSection, Retrievable, HasContentProperties:
text: Content texttitle: Section titleparentId: Parent section IDid: Unique identifieruri: Optional URImetadata: Metadata mapIn-memory representation of a container section.
data class DefaultMaterializedContainerSection(
val title: String,
val children: List<NavigableSection>,
override val id: String,
override val parentId: String?,
override val uri: String?,
override val metadata: Map<String, Any?> = emptyMap()
) : NavigableContainerSectionProperties:
title: Section titlechildren: List of child sectionsid: Unique identifierparentId: Parent section IDuri: Optional URImetadata: Metadata mapNavigable document root interface.
interface NavigableDocument : ContentRoot, NavigableContainerSection {
/**
* Create a copy with additional metadata
*/
fun withMetadata(additionalMetadata: Map<String, Any?>): NavigableDocument
}Methods:
withMetadata(): Returns new document with merged metadataIn-memory representation of a complete document.
data class MaterializedDocument(
val title: String,
val ingestionTimestamp: Instant,
val children: List<NavigableSection>,
override val id: String,
override val uri: String,
override val metadata: Map<String, Any?> = emptyMap()
) : NavigableDocumentProperties:
title: Document titleingestionTimestamp: Ingestion timestampchildren: Top-level sectionsid: Unique identifieruri: Document URI (required)metadata: Metadata mapTraditional RAG text chunks with metadata for indexing and retrieval.
Text chunk interface with support for transformation and metadata enrichment.
interface Chunk : Source, HierarchicalContentElement {
val text: String // Indexed text
val urtext: String // Raw text for citation
override val parentId: String // Non-null parent
val pathFromRoot: List<String>?
val uri: String?
/**
* Create a new chunk with transformed text
*/
fun withText(transformed: String): Chunk
/**
* Create a new chunk with additional metadata
*/
fun withAdditionalMetadata(metadata: Map<String, Any?>): Chunk
companion object {
/**
* Create a chunk
*/
operator fun invoke(
id: String,
text: String,
metadata: Map<String, Any?>,
parentId: String
): Chunk
@JvmStatic
fun create(
text: String,
parentId: String,
metadata: Map<String, Any?> = emptyMap(),
id: String = UUID.randomUUID().toString(),
urtext: String = text
): Chunk
}
}Properties:
text: Indexed text (may be transformed)urtext: Original raw text for citationparentId: Parent section ID (required)pathFromRoot: Path from document root to chunkuri: Optional URIMethods:
withText(): Returns new chunk with modified textwithAdditionalMetadata(): Returns new chunk with merged metadataFactory Methods:
invoke(): Create chunk with specified propertiescreate(): Create chunk with defaults (generates UUID)Constants for standard chunk metadata from ContentChunker:
companion object {
const val CHUNK_INDEX = "chunk_index"
const val TOTAL_CHUNKS = "total_chunks"
const val SEQUENCE_NUMBER = "sequence_number"
const val ROOT_DOCUMENT_ID = "root_document_id"
const val CONTAINER_SECTION_ID = "container_section_id"
const val CONTAINER_SECTION_TITLE = "container_section_title"
const val CONTAINER_SECTION_URL = "container_section_url"
const val LEAF_SECTION_ID = "leaf_section_id"
const val LEAF_SECTION_TITLE = "leaf_section_title"
const val LEAF_SECTION_URL = "leaf_section_url"
}Metadata Keys:
CHUNK_INDEX: Index within parent section (0-based)TOTAL_CHUNKS: Total chunks from parent sectionSEQUENCE_NUMBER: Global sequence number across documentROOT_DOCUMENT_ID: ID of document rootCONTAINER_SECTION_ID: ID of container sectionCONTAINER_SECTION_TITLE: Title of container sectionCONTAINER_SECTION_URL: URL of container sectionLEAF_SECTION_ID: ID of leaf sectionLEAF_SECTION_TITLE: Title of leaf sectionLEAF_SECTION_URL: URL of leaf sectionInput data for RAG systems (chunks or facts).
Base interface for RAG input data.
sealed interface Source : RetrievableFactual assertion with authority.
data class Fact(
val assertion: String,
val authority: String,
override val uri: String?,
override val metadata: Map<String, Any?>,
override val id: String
) : SourceProperties:
assertion: The factual statementauthority: Source of authority for the facturi: Optional URI referencemetadata: Associated metadataid: Unique identifierStructured entities with properties and relationships.
Base contract for named entities.
interface NamedEntity : Retrievable, NamedAndDescribed {
override val id: String
override val name: String
override val description: String
val uri: String? get() = null
val metadata: Map<String, Any?> get() = emptyMap()
fun labels(): Set<String>
fun embeddableValue(): String
fun infoString(verbose: Boolean? = null, indent: Int = 0): String
}Properties:
id: Unique identifiername: Entity namedescription: Entity descriptionuri: Optional URI (default null)metadata: Metadata map (default empty)Methods:
labels(): Returns type labels for entityembeddableValue(): Returns text for embeddinginfoString(): Returns formatted info stringStorage format for named entities with arbitrary properties.
interface NamedEntityData : NamedEntity {
val properties: Map<String, Any>
val linkedDomainType: DomainType?
/**
* Convert to typed instance using ObjectMapper
*/
fun <T : NamedEntity> toTypedInstance(objectMapper: ObjectMapper): T?
fun <T : NamedEntity> toTypedInstance(
objectMapper: ObjectMapper,
type: Class<T>
): T?
fun <T : NamedEntity> toTypedInstance(
objectMapper: ObjectMapper,
type: Class<T>,
navigator: RelationshipNavigator?
): T?
/**
* Create dynamic proxy instance implementing specified interfaces
*/
fun <T : NamedEntity> toInstance(
vararg interfaces: Class<out NamedEntity>
): T
fun <T : NamedEntity> toInstance(
navigator: RelationshipNavigator?,
vararg interfaces: Class<out NamedEntity>
): T
companion object {
val DEFAULT_EXCLUDED_PROPERTIES = setOf("embedding", "id")
const val ENTITY_LABEL = "__Entity__"
const val HAS_ENTITY = "HAS_ENTITY"
}
}Properties:
properties: Map of arbitrary propertieslinkedDomainType: Optional domain type referenceMethods:
toTypedInstance(): Convert to typed entity class using ObjectMappertoInstance(): Create dynamic proxy implementing specified interfacesConstants:
DEFAULT_EXCLUDED_PROPERTIES: Properties not persisted by defaultENTITY_LABEL: Standard label for entitiesHAS_ENTITY: Standard relationship nameSimple implementation of NamedEntityData.
data class SimpleNamedEntityData(
override val id: String,
override val name: String,
override val description: String,
override val properties: Map<String, Any> = emptyMap(),
override val linkedDomainType: DomainType? = null,
override val uri: String? = null,
override val metadata: Map<String, Any?> = emptyMap()
) : NamedEntityDataProperties:
id: Unique identifiername: Entity namedescription: Entity descriptionproperties: Property map (default empty)linkedDomainType: Optional domain typeuri: Optional URImetadata: Metadata map (default empty)Annotations and interfaces for entity relationships.
Marks getter methods as navigating relationships.
@Target(AnnotationTarget.FUNCTION)
@Retention(AnnotationRetention.RUNTIME)
annotation class Relationship(
val name: String = "",
val direction: RelationshipDirection = RelationshipDirection.OUTGOING
)Parameters:
name: Relationship name (derived from method name if empty)direction: Direction of relationship traversalDirection of relationships.
enum class RelationshipDirection {
OUTGOING,
INCOMING,
BOTH
}Values:
OUTGOING: Follow relationships from source to targetINCOMING: Follow relationships from target to sourceBOTH: Follow relationships in both directionsProvides relationship navigation capabilities.
interface RelationshipNavigator {
fun findRelated(
source: RetrievableIdentifier,
relationshipName: String,
direction: RelationshipDirection
): List<NamedEntityData>
}Methods:
findRelated(): Find entities related to source by relationshipParameters:
source: Source entity identifierrelationshipName: Name of relationship to followdirection: Direction to traverseReturns: List of related entities
Top-level functions for working with relationships.
/**
* Derive relationship name from method name
* Converts method names like "getColleagues" to "COLLEAGUES"
* @param methodName Method name to convert
* @return Derived relationship name
*/
fun deriveRelationshipName(methodName: String): StringParameters:
methodName: Method name to convertReturns: Derived relationship name (uppercase, without "get" prefix)
import com.embabel.agent.rag.model.*
import java.time.Instant
// Create a document structure
val leafSection = LeafSection(
text = "This is the content of the section.",
title = "Introduction",
parentId = "doc-1",
id = "section-1",
uri = "https://example.com/docs#intro"
)
val document = MaterializedDocument(
title = "User Guide",
ingestionTimestamp = Instant.now(),
children = listOf(leafSection),
id = "doc-1",
uri = "https://example.com/docs"
)
// Navigate the document
val leaves = document.leaves().toList()
val allSections = document.descendants().toList()
// Add metadata
val enrichedDoc = document.withMetadata(
mapOf("author" to "Alice", "version" to "1.0")
)import com.embabel.agent.rag.model.Chunk
import com.embabel.agent.rag.ingestion.ContentChunker
// Create a chunk
val chunk = Chunk.create(
text = "This is the indexed text content.",
parentId = "section-1",
metadata = mapOf(
ContentChunker.CHUNK_INDEX to 0,
ContentChunker.TOTAL_CHUNKS to 5,
ContentChunker.ROOT_DOCUMENT_ID to "doc-1"
)
)
// Transform chunk text
val transformed = chunk.withText("TRANSFORMED: ${chunk.text}")
// Add metadata
val enriched = chunk.withAdditionalMetadata(
mapOf("sentiment" to "positive", "language" to "en")
)
// Access metadata
val chunkIndex = chunk.metadata[ContentChunker.CHUNK_INDEX] as? Int
val rootId = chunk.metadata[ContentChunker.ROOT_DOCUMENT_ID] as? Stringimport com.embabel.agent.rag.model.*
// Create a named entity
val person = SimpleNamedEntityData(
id = "person-123",
name = "Alice Smith",
description = "Senior software engineer",
properties = mapOf(
"role" to "engineer",
"team" to "platform",
"yearsExperience" to 8
)
)
// Convert to typed instance (requires ObjectMapper)
data class Person(
override val id: String,
override val name: String,
override val description: String,
val role: String,
val team: String,
val yearsExperience: Int
) : NamedEntity {
override fun labels() = setOf("Person")
override fun embeddableValue() = "$name: $description"
override fun infoString(verbose: Boolean?, indent: Int) = name
}
val typedPerson = person.toTypedInstance<Person>(objectMapper, Person::class.java)
// Create dynamic proxy for interfaces
interface Employee : NamedEntity {
@Relationship(name = "WORKS_WITH")
fun getColleagues(): List<NamedEntity>
}
val employee = person.toInstance<Employee>(
navigator,
Employee::class.java
)import com.embabel.agent.rag.model.*
import com.embabel.agent.rag.service.*
// Define an entity interface with relationships
interface Project : NamedEntity {
@Relationship(name = "HAS_CONTRIBUTOR", direction = RelationshipDirection.INCOMING)
fun getContributors(): List<NamedEntity>
@Relationship(name = "DEPENDS_ON")
fun getDependencies(): List<NamedEntity>
}
// Navigate relationships using RelationshipNavigator
val navigator: RelationshipNavigator = // implementation
val contributors = navigator.findRelated(
source = RetrievableIdentifier("project-1", "Project"),
relationshipName = "HAS_CONTRIBUTOR",
direction = RelationshipDirection.INCOMING
)
val dependencies = navigator.findRelated(
source = RetrievableIdentifier("project-1", "Project"),
relationshipName = "DEPENDS_ON",
direction = RelationshipDirection.OUTGOING
)
// Derive relationship name from method
val relationshipName = deriveRelationshipName("getColleagues")
// Returns: "COLLEAGUES"import com.embabel.agent.rag.model.Fact
// Create a fact
val fact = Fact(
id = "fact-456",
assertion = "The framework supports vector search.",
authority = "Official documentation",
uri = "https://docs.example.com/features",
metadata = mapOf(
"confidence" to 0.95,
"source" to "manual"
)
)
// Access properties
println("Assertion: ${fact.assertion}")
println("Authority: ${fact.authority}")
println("Embeddable: ${fact.embeddableValue()}")import com.embabel.agent.rag.model.*
import java.time.Instant
// Build a complex document structure
val section1 = LeafSection(
text = "Introduction content",
title = "Introduction",
parentId = "doc-1",
id = "section-1",
uri = "https://example.com/docs#intro"
)
val section2 = LeafSection(
text = "Getting started content",
title = "Getting Started",
parentId = "doc-1",
id = "section-2",
uri = "https://example.com/docs#getting-started"
)
val subsection = LeafSection(
text = "Installation instructions",
title = "Installation",
parentId = "section-2",
id = "section-2-1",
uri = "https://example.com/docs#installation"
)
val containerSection = DefaultMaterializedContainerSection(
title = "Setup",
children = listOf(section2, subsection),
id = "container-1",
parentId = "doc-1",
uri = "https://example.com/docs#setup"
)
val document = MaterializedDocument(
title = "Complete Guide",
ingestionTimestamp = Instant.now(),
children = listOf(section1, containerSection),
id = "doc-1",
uri = "https://example.com/docs",
metadata = mapOf(
"author" to "Documentation Team",
"version" to "2.0",
"language" to "en"
)
)
// Navigate hierarchy
val allLeaves = document.leaves().toList()
println("Total leaf sections: ${allLeaves.size}")
val allDescendants = document.descendants().toList()
println("Total sections: ${allDescendants.size}")