RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
—
Transform and enrich chunks during ingestion with support for text modification, metadata enrichment, and transformation chaining.
Base interface for transforming chunks during ingestion.
interface ChunkTransformer {
/**
* Transformer name for identification
*/
val name: String
/**
* Transform a chunk with context
* @param chunk Chunk to transform
* @param context Transformation context with section and document info
* @return Transformed chunk
*/
fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
companion object {
/**
* No-operation transformer that passes chunks through unchanged
*/
@JvmField
val NO_OP: ChunkTransformer
}
}Properties:
name: Identifier for the transformerMethods:
transform(): Transform a chunk
chunk: Chunk to transformcontext: Contextual informationConstants:
NO_OP: No-operation transformer (identity function)Context information available during chunk transformation.
data class ChunkTransformationContext(
/**
* Section containing the chunk
*/
val section: Section,
/**
* Document root (if available)
*/
val document: ContentRoot?
)Properties:
section: Parent section of chunkdocument: Document root (may be null)Use Cases:
Base class for implementing chunk transformers.
abstract class AbstractChunkTransformer : ChunkTransformer {
/**
* Generate additional metadata for chunk
* Override to add custom metadata
* @param chunk Chunk being transformed
* @param context Transformation context
* @return Map of metadata to add
*/
open fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> = emptyMap()
/**
* Generate new text for chunk
* Override to modify chunk text
* @param chunk Chunk being transformed
* @param context Transformation context
* @return Modified text
*/
open fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String = chunk.text
/**
* Final transform implementation
* Applies metadata and text transformations
*/
final override fun transform(
chunk: Chunk,
context: ChunkTransformationContext
): Chunk
}Methods to Override:
additionalMetadata(): Return metadata to addnewText(): Return transformed textTemplate Method:
transform(): Final implementation (combines metadata and text)Usage Pattern:
AbstractChunkTransformeradditionalMetadata() and/or newText()Chain multiple transformers in sequence.
class ChainedChunkTransformer(
/**
* List of transformers to apply in order
*/
val transformers: List<ChunkTransformer>
) : ChunkTransformer {
override val name: String
/**
* Apply all transformers in sequence
* @param chunk Chunk to transform
* @param context Transformation context
* @return Fully transformed chunk
*/
override fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
/**
* Add a transformer to the chain
* @param transformer Transformer to append
* @return New chained transformer
*/
fun withTransformer(transformer: ChunkTransformer): ChainedChunkTransformer
}Constructor:
transformers: Transformers to apply in orderProperties:
name: Combined names of all transformersMethods:
transform(): Apply all transformers sequentiallywithTransformer(): Create new chain with added transformerBehavior:
Pre-built transformer implementations.
Adds section and document titles to chunk text.
object AddTitlesChunkTransformer : ChunkTransformer {
override val name: String
/**
* Transform chunk by prepending titles
* @param chunk Chunk to transform
* @param context Transformation context
* @return Chunk with titles added to text
*/
override fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
}Behavior:
Example Output:
Document: User Guide
Section: Installation
[original chunk text]import com.embabel.agent.rag.ingestion.*
// Use no-op transformer (passes chunks through unchanged)
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = ChunkTransformer.NO_OP
)import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.model.*
// Add metadata only
class MetadataEnricher : AbstractChunkTransformer() {
override val name = "metadata-enricher"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
return mapOf(
"section_title" to context.section.title,
"document_title" to (context.document?.title ?: "unknown"),
"transformed_at" to System.currentTimeMillis(),
"text_length" to chunk.text.length
)
}
}
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = MetadataEnricher()
)import com.embabel.agent.rag.ingestion.*
// Modify chunk text
class TextPrefixTransformer : AbstractChunkTransformer() {
override val name = "text-prefix"
override fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String {
return "Section: ${context.section.title}\n\n${chunk.text}"
}
}import com.embabel.agent.rag.ingestion.*
class ComprehensiveTransformer : AbstractChunkTransformer() {
override val name = "comprehensive"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
return mapOf(
"section" to context.section.title,
"document" to (context.document?.title ?: "unknown"),
"word_count" to chunk.text.split(Regex("\\s+")).size
)
}
override fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String {
val prefix = buildString {
context.document?.let { doc ->
appendLine("# ${doc.title}")
appendLine()
}
appendLine("## ${context.section.title}")
appendLine()
}
return prefix + chunk.text
}
}import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.ingestion.transform.*
// Create chain
val chainedTransformer = ChainedChunkTransformer(
transformers = listOf(
AddTitlesChunkTransformer,
MetadataEnricher(),
CustomTransformer()
)
)
// Use chained transformer
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = chainedTransformer
)
// Or build incrementally
val builtChain = ChainedChunkTransformer(listOf(AddTitlesChunkTransformer))
.withTransformer(MetadataEnricher())
.withTransformer(CustomTransformer())import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.ingestion.transform.*
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = AddTitlesChunkTransformer
)
// Chunks will have section titles prependedimport com.embabel.agent.rag.ingestion.*
class LanguageDetector : AbstractChunkTransformer() {
override val name = "language-detector"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
val language = detectLanguage(chunk.text)
return mapOf(
"language" to language,
"is_english" to (language == "en")
)
}
private fun detectLanguage(text: String): String {
return when {
text.contains(Regex("[\\p{IsHan}]")) -> "zh"
text.contains(Regex("[\\p{IsHiragana}\\p{IsKatakana}]")) -> "ja"
text.contains(Regex("[\\p{IsHangul}]")) -> "ko"
else -> "en"
}
}
}import com.embabel.agent.rag.ingestion.*
class SentimentAnalyzer : AbstractChunkTransformer() {
override val name = "sentiment-analyzer"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
val sentiment = analyzeSentiment(chunk.text)
return mapOf(
"sentiment" to sentiment.name,
"sentiment_score" to sentiment.score
)
}
private fun analyzeSentiment(text: String): Sentiment {
val positive = listOf("good", "great", "excellent", "success")
val negative = listOf("bad", "error", "fail", "problem")
val lowerText = text.lowercase()
val positiveCount = positive.count { lowerText.contains(it) }
val negativeCount = negative.count { lowerText.contains(it) }
return when {
positiveCount > negativeCount -> Sentiment("positive", 0.7)
negativeCount > positiveCount -> Sentiment("negative", -0.7)
else -> Sentiment("neutral", 0.0)
}
}
data class Sentiment(val name: String, val score: Double)
}import com.embabel.agent.rag.ingestion.*
class ConditionalTransformer(
private val condition: (Chunk, ChunkTransformationContext) -> Boolean,
private val transformer: ChunkTransformer
) : ChunkTransformer {
override val name = "conditional-${transformer.name}"
override fun transform(
chunk: Chunk,
context: ChunkTransformationContext
): Chunk {
return if (condition(chunk, context)) {
transformer.transform(chunk, context)
} else {
chunk
}
}
}
// Only transform long chunks
val lengthBased = ConditionalTransformer(
condition = { chunk, _ -> chunk.text.length > 500 },
transformer = AddTitlesChunkTransformer
)urtext propertySee the source documentation for more comprehensive usage examples.