RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
Transform and enrich chunks during ingestion with support for text modification, metadata enrichment, and transformation chaining.
Base interface for transforming chunks during ingestion.
interface ChunkTransformer {
/**
* Transformer name for identification
*/
val name: String
/**
* Transform a chunk with context
* @param chunk Chunk to transform
* @param context Transformation context with section and document info
* @return Transformed chunk
*/
fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
companion object {
/**
* No-operation transformer that passes chunks through unchanged
*/
@JvmField
val NO_OP: ChunkTransformer
}
}Properties:
name: Identifier for the transformerMethods:
transform(): Transform a chunk
chunk: Chunk to transformcontext: Contextual informationConstants:
NO_OP: No-operation transformer (identity function)Context information available during chunk transformation.
data class ChunkTransformationContext(
/**
* Section containing the chunk
*/
val section: Section,
/**
* Document root (if available)
*/
val document: ContentRoot?
)Properties:
section: Parent section of chunkdocument: Document root (may be null)Use Cases:
Base class for implementing chunk transformers.
abstract class AbstractChunkTransformer : ChunkTransformer {
/**
* Generate additional metadata for chunk
* Override to add custom metadata
* @param chunk Chunk being transformed
* @param context Transformation context
* @return Map of metadata to add
*/
open fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> = emptyMap()
/**
* Generate new text for chunk
* Override to modify chunk text
* @param chunk Chunk being transformed
* @param context Transformation context
* @return Modified text
*/
open fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String = chunk.text
/**
* Final transform implementation
* Applies metadata and text transformations
*/
final override fun transform(
chunk: Chunk,
context: ChunkTransformationContext
): Chunk
}Methods to Override:
additionalMetadata(): Return metadata to addnewText(): Return transformed textTemplate Method:
transform(): Final implementation (combines metadata and text)Usage Pattern:
AbstractChunkTransformeradditionalMetadata() and/or newText()Chain multiple transformers in sequence.
class ChainedChunkTransformer(
/**
* List of transformers to apply in order
*/
val transformers: List<ChunkTransformer>
) : ChunkTransformer {
override val name: String
/**
* Apply all transformers in sequence
* @param chunk Chunk to transform
* @param context Transformation context
* @return Fully transformed chunk
*/
override fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
/**
* Add a transformer to the chain
* @param transformer Transformer to append
* @return New chained transformer
*/
fun withTransformer(transformer: ChunkTransformer): ChainedChunkTransformer
}Constructor:
transformers: Transformers to apply in orderProperties:
name: Combined names of all transformersMethods:
transform(): Apply all transformers sequentiallywithTransformer(): Create new chain with added transformerBehavior:
Pre-built transformer implementations.
Adds section and document titles to chunk text.
object AddTitlesChunkTransformer : ChunkTransformer {
override val name: String
/**
* Transform chunk by prepending titles
* @param chunk Chunk to transform
* @param context Transformation context
* @return Chunk with titles added to text
*/
override fun transform(chunk: Chunk, context: ChunkTransformationContext): Chunk
}Behavior:
Example Output:
Document: User Guide
Section: Installation
[original chunk text]import com.embabel.agent.rag.ingestion.*
// Use no-op transformer (passes chunks through unchanged)
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = ChunkTransformer.NO_OP
)import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.model.*
// Add metadata only
class MetadataEnricher : AbstractChunkTransformer() {
override val name = "metadata-enricher"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
return mapOf(
"section_title" to context.section.title,
"document_title" to (context.document?.title ?: "unknown"),
"transformed_at" to System.currentTimeMillis(),
"text_length" to chunk.text.length
)
}
}
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = MetadataEnricher()
)import com.embabel.agent.rag.ingestion.*
// Modify chunk text
class TextPrefixTransformer : AbstractChunkTransformer() {
override val name = "text-prefix"
override fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String {
return "Section: ${context.section.title}\n\n${chunk.text}"
}
}import com.embabel.agent.rag.ingestion.*
class ComprehensiveTransformer : AbstractChunkTransformer() {
override val name = "comprehensive"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
return mapOf(
"section" to context.section.title,
"document" to (context.document?.title ?: "unknown"),
"word_count" to chunk.text.split(Regex("\\s+")).size
)
}
override fun newText(
chunk: Chunk,
context: ChunkTransformationContext
): String {
val prefix = buildString {
context.document?.let { doc ->
appendLine("# ${doc.title}")
appendLine()
}
appendLine("## ${context.section.title}")
appendLine()
}
return prefix + chunk.text
}
}import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.ingestion.transform.*
// Create chain
val chainedTransformer = ChainedChunkTransformer(
transformers = listOf(
AddTitlesChunkTransformer,
MetadataEnricher(),
CustomTransformer()
)
)
// Use chained transformer
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = chainedTransformer
)
// Or build incrementally
val builtChain = ChainedChunkTransformer(listOf(AddTitlesChunkTransformer))
.withTransformer(MetadataEnricher())
.withTransformer(CustomTransformer())import com.embabel.agent.rag.ingestion.*
import com.embabel.agent.rag.ingestion.transform.*
val chunker = ContentChunker(
config = ContentChunker.Config(),
chunkTransformer = AddTitlesChunkTransformer
)
// Chunks will have section titles prependedimport com.embabel.agent.rag.ingestion.*
class LanguageDetector : AbstractChunkTransformer() {
override val name = "language-detector"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
val language = detectLanguage(chunk.text)
return mapOf(
"language" to language,
"is_english" to (language == "en")
)
}
private fun detectLanguage(text: String): String {
return when {
text.contains(Regex("[\\p{IsHan}]")) -> "zh"
text.contains(Regex("[\\p{IsHiragana}\\p{IsKatakana}]")) -> "ja"
text.contains(Regex("[\\p{IsHangul}]")) -> "ko"
else -> "en"
}
}
}import com.embabel.agent.rag.ingestion.*
class SentimentAnalyzer : AbstractChunkTransformer() {
override val name = "sentiment-analyzer"
override fun additionalMetadata(
chunk: Chunk,
context: ChunkTransformationContext
): Map<String, Any> {
val sentiment = analyzeSentiment(chunk.text)
return mapOf(
"sentiment" to sentiment.name,
"sentiment_score" to sentiment.score
)
}
private fun analyzeSentiment(text: String): Sentiment {
val positive = listOf("good", "great", "excellent", "success")
val negative = listOf("bad", "error", "fail", "problem")
val lowerText = text.lowercase()
val positiveCount = positive.count { lowerText.contains(it) }
val negativeCount = negative.count { lowerText.contains(it) }
return when {
positiveCount > negativeCount -> Sentiment("positive", 0.7)
negativeCount > positiveCount -> Sentiment("negative", -0.7)
else -> Sentiment("neutral", 0.0)
}
}
data class Sentiment(val name: String, val score: Double)
}import com.embabel.agent.rag.ingestion.*
class ConditionalTransformer(
private val condition: (Chunk, ChunkTransformationContext) -> Boolean,
private val transformer: ChunkTransformer
) : ChunkTransformer {
override val name = "conditional-${transformer.name}"
override fun transform(
chunk: Chunk,
context: ChunkTransformationContext
): Chunk {
return if (condition(chunk, context)) {
transformer.transform(chunk, context)
} else {
chunk
}
}
}
// Only transform long chunks
val lengthBased = ConditionalTransformer(
condition = { chunk, _ -> chunk.text.length > 500 },
transformer = AddTitlesChunkTransformer
)urtext propertySee the source documentation for more comprehensive usage examples.