RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
—
Comprehensive search capabilities including vector search, text search, and regex search with filtering support.
Marker interface for search operation implementations.
interface SearchOperationsOperations that support type checking.
interface TypeRetrievalOperations : SearchOperations {
fun supportsType(type: String): Boolean
}Methods:
supportsType(): Check if a type is supported for retrievalParameters:
type: Type name (label) to checkReturns: true if type is supported
Find operations by ID.
interface FinderOperations : TypeRetrievalOperations {
fun <T> findById(id: String, clazz: Class<T>): T?
fun <T : Retrievable> findById(id: String, type: String): T?
}Methods:
findById(id, clazz): Find by ID and convert to classfindById(id, type): Find by ID using type stringParameters:
id: Entity identifierclazz: Expected result classtype: Type name (label)Returns: Entity or null if not found
Semantic similarity search using vector embeddings.
Basic vector similarity search operations.
interface VectorSearch : TypeRetrievalOperations {
/**
* Perform vector similarity search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @return List of similarity results ordered by relevance
*/
fun <T : Retrievable> vectorSearch(
request: TextSimilaritySearchRequest,
clazz: Class<T>
): List<SimilarityResult<T>>
}Methods:
vectorSearch(): Execute semantic similarity searchParameters:
request: Search request with query and parametersclazz: Type of results to returnReturns: List of results sorted by similarity score (descending)
Vector search with metadata and entity filtering.
interface FilteringVectorSearch : VectorSearch {
/**
* Perform filtered vector similarity search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of similarity results
*/
fun <T : Retrievable> vectorSearchWithFilter(
request: TextSimilaritySearchRequest,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
vectorSearchWithFilter(): Execute filtered vector searchParameters:
request: Search requestclazz: Result typemetadataFilter: Optional filter on metadata propertiesentityFilter: Optional filter on entity labelsReturns: Filtered results sorted by similarity
Full-text search operations with Lucene-like syntax support.
Basic full-text search operations.
interface TextSearch : TypeRetrievalOperations {
/**
* Notes about the Lucene syntax supported by this implementation
*/
val luceneSyntaxNotes: String
/**
* Perform full-text search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @return List of similarity results ordered by relevance
*/
fun <T : Retrievable> textSearch(
request: TextSimilaritySearchRequest,
clazz: Class<T>
): List<SimilarityResult<T>>
}Properties:
luceneSyntaxNotes: Description of supported Lucene syntaxMethods:
textSearch(): Execute full-text searchParameters:
request: Search request with Lucene queryclazz: Result typeReturns: List of results sorted by relevance
Query Syntax: Supports Lucene-like query syntax including:
Text search with metadata and entity filtering.
interface FilteringTextSearch : TextSearch {
/**
* Perform filtered full-text search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of similarity results
*/
fun <T : Retrievable> textSearchWithFilter(
request: TextSimilaritySearchRequest,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
textSearchWithFilter(): Execute filtered text searchParameters:
request: Search request with Lucene queryclazz: Result typemetadataFilter: Optional metadata filterentityFilter: Optional entity label filterReturns: Filtered results sorted by relevance
Pattern-based search using regular expressions.
Basic regex search operations.
interface RegexSearchOperations : SearchOperations {
/**
* Search using regular expression pattern
* @param regex Regular expression pattern
* @param topK Maximum number of results to return
* @param clazz Class of retrievable items to search
* @return List of matching results
*/
fun <T : Retrievable> regexSearch(
regex: Regex,
topK: Int,
clazz: Class<T>
): List<SimilarityResult<T>>
}Methods:
regexSearch(): Search using regex patternParameters:
regex: Regular expression to matchtopK: Maximum results to returnclazz: Result typeReturns: List of matching results (up to topK)
Regex search with metadata and entity filtering.
interface FilteringRegexSearch : RegexSearchOperations {
/**
* Search using regex pattern with filtering
* @param regex Regular expression pattern
* @param topK Maximum number of results to return
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of matching results
*/
fun <T : Retrievable> regexSearchWithFilter(
regex: Regex,
topK: Int,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
regexSearchWithFilter(): Execute filtered regex searchParameters:
regex: Regular expression patterntopK: Maximum resultsclazz: Result typemetadataFilter: Optional metadata filterentityFilter: Optional entity filterReturns: Filtered matching results (up to topK)
Expand search results with additional context.
Interface for expanding search results with surrounding chunks or parent sections.
interface ResultExpander : SearchOperations {
/**
* Expand a result to include additional context
* @param id ID of the element to expand
* @param method Expansion method (SEQUENCE or ZOOM_OUT)
* @param elementsToAdd Number of elements to add
* @return List of expanded content elements
*/
fun expandResult(
id: String,
method: Method,
elementsToAdd: Int
): List<ContentElement>
enum class Method {
SEQUENCE, // Expand to previous/next chunks in sequence
ZOOM_OUT // Expand to enclosing section
}
}Methods:
expandResult(): Expand a result with contextParameters:
id: Element ID to expandmethod: Expansion strategy (SEQUENCE or ZOOM_OUT)elementsToAdd: Number of elements to includeReturns: List of content elements providing context
Expansion Methods:
SEQUENCE: Include previous/next chunks in document sequenceZOOM_OUT: Include parent sections in hierarchyCommonly implemented combination of search capabilities.
interface CoreSearchOperations : VectorSearch, TextSearchCombines both vector and text search capabilities.
Data classes and interfaces supporting search operations.
Identifier for any Retrievable object.
data class RetrievableIdentifier(
val id: String, // Unique identifier within type
val type: String // Type/namespace (label)
) {
companion object {
fun forChunk(id: String): RetrievableIdentifier
fun forUser(id: String): RetrievableIdentifier
fun from(retrievable: Retrievable): RetrievableIdentifier
}
}Properties:
id: Unique identifiertype: Type label (e.g., "Chunk", "Person")Factory Methods:
forChunk(): Create identifier for chunkforUser(): Create identifier for userfrom(): Create from retrievable objectCluster of similar items.
data class Cluster<E>(
val anchor: E,
val similar: List<SimilarityResult<E>>
)Properties:
anchor: Central item in clustersimilar: List of similar items with scoresRequest parameters for cluster retrieval.
data class ClusterRetrievalRequest<E>(
val similarityThreshold: ZeroToOne = 0.7,
val topK: Int = 10,
val vectorIndex: String = "embabel-entity-index"
) {
fun withSimilarityThreshold(
similarityThreshold: ZeroToOne
): ClusterRetrievalRequest<E>
fun withTopK(topK: Int): ClusterRetrievalRequest<E>
}Properties:
similarityThreshold: Minimum similarity (0.0-1.0)topK: Maximum results per clustervectorIndex: Vector index nameMethods:
withSimilarityThreshold(): Copy with new thresholdwithTopK(): Copy with new topKInterface for finding clusters of similar items.
interface ClusterFinder {
fun <E> findClusters(
opts: ClusterRetrievalRequest<E>
): List<Cluster<E>>
}Methods:
findClusters(): Find clusters of similar itemsParameters:
opts: Clustering parametersReturns: List of clusters with anchors and similar items
Container for similarity search results.
interface SimilarityResults<R : Retrievable> {
val results: List<SimilarityResult<out R>>
companion object {
@JvmStatic
fun <R : Retrievable> fromList(
results: List<SimilarityResult<out R>>
): SimilarityResults<Retrievable>
}
}Properties:
results: List of similarity resultsFactory Methods:
fromList(): Create from list of resultsFormats search results for display.
fun interface RetrievableResultsFormatter {
fun formatResults(
similarityResults: SimilarityResults<out Retrievable>
): String
}Methods:
formatResults(): Format results as stringParameters:
similarityResults: Results to formatReturns: Formatted string representation
Default formatter implementation.
object SimpleRetrievableResultsFormatter : RetrievableResultsFormatter {
override fun formatResults(
similarityResults: SimilarityResults<out Retrievable>
): String
}Methods:
formatResults(): Format with default templateBuilders for constructing search operation instances.
Base builder interface for search operations.
interface SearchOperationsBuilder<T, THIS> {
fun withName(name: String): THIS
fun withEmbeddingService(embeddingService: EmbeddingService): THIS
fun withChunkTransformer(chunkTransformer: ChunkTransformer): THIS
fun build(): T
}Methods:
withName(): Set search operations namewithEmbeddingService(): Set embedding servicewithChunkTransformer(): Set chunk transformerbuild(): Construct search operations instanceBuilder for search operations with ingestion capabilities.
interface IngestingSearchOperationsBuilder<T, THIS> :
SearchOperationsBuilder<T, THIS> {
fun withChunkerConfig(chunkerConfig: ContentChunker.Config): THIS
fun withContentChunker(contentChunker: ContentChunker): THIS
}Methods:
withChunkerConfig(): Set chunker configurationwithContentChunker(): Set content chunkerimport com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.Chunk
val searchOps: VectorSearch = // implementation
// Simple vector search
val results = searchOps.vectorSearch(
request = TextSimilaritySearchRequest(
query = "machine learning algorithms",
topK = 10,
similarityThreshold = 0.7
),
clazz = Chunk::class.java
)
// Process results
results.forEach { result ->
println("Score: ${result.score}")
println("Content: ${result.content.text}")
println("---")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*
val searchOps: FilteringVectorSearch = // implementation
// Build complex filter
val metadataFilter = PropertyFilter.eq("category", "security")
.and(PropertyFilter.gte("version", 2.0))
.and(PropertyFilter.contains("tags", "authentication"))
val entityFilter = EntityFilter.hasAnyLabel("Chunk", "Fact")
// Vector search with filtering
val results = searchOps.vectorSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "authentication setup",
topK = 10,
similarityThreshold = 0.75
),
clazz = Chunk::class.java,
metadataFilter = metadataFilter,
entityFilter = entityFilter
)
println("Found ${results.size} results")import com.embabel.agent.rag.service.*
val searchOps: TextSearch = // implementation
// Check Lucene syntax support
println("Syntax notes: ${searchOps.luceneSyntaxNotes}")
// Perform text search with Lucene syntax
val results = searchOps.textSearch(
request = TextSimilaritySearchRequest(
query = "authentication AND (oauth OR jwt) NOT deprecated",
topK = 20
),
clazz = Chunk::class.java
)
results.forEach { result ->
println("${result.score}: ${result.content.text.take(100)}...")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
val searchOps: FilteringTextSearch = // implementation
// Text search with metadata filter
val results = searchOps.textSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "installation OR setup",
topK = 15
),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("category", "tutorial")
.and(PropertyFilter.ne("status", "deprecated")),
entityFilter = null
)import com.embabel.agent.rag.service.*
val searchOps: RegexSearchOperations = // implementation
// Search for email addresses
val emailPattern = Regex("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
val results = searchOps.regexSearch(
regex = emailPattern,
topK = 50,
clazz = Chunk::class.java
)
// Search for version numbers
val versionPattern = Regex("v?\\d+\\.\\d+\\.\\d+")
val versionResults = searchOps.regexSearch(
regex = versionPattern,
topK = 100,
clazz = Chunk::class.java
)import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
val searchOps: FilteringRegexSearch = // implementation
// Search for URLs in specific sections
val urlPattern = Regex("https?://[^\\s]+")
val results = searchOps.regexSearchWithFilter(
regex = urlPattern,
topK = 30,
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("section", "references")
.and(PropertyFilter.eq("status", "published")),
entityFilter = null
)import com.embabel.agent.rag.service.*
val searchOps: ResultExpander = // implementation
val vectorSearchOps: VectorSearch = // implementation
// Find a chunk, then expand for more context
val initialResults = vectorSearchOps.vectorSearch(
request = TextSimilaritySearchRequest("error handling", topK = 1),
clazz = Chunk::class.java
)
val firstResult = initialResults.first()
// Expand to include 2 chunks before and after
val sequenceContext = searchOps.expandResult(
id = firstResult.content.id,
method = ResultExpander.Method.SEQUENCE,
elementsToAdd = 2
)
println("Sequence context: ${sequenceContext.size} elements")
// Zoom out to parent section
val parentContext = searchOps.expandResult(
id = firstResult.content.id,
method = ResultExpander.Method.ZOOM_OUT,
elementsToAdd = 1
)
println("Parent context: ${parentContext.size} elements")import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
val searchOps: FinderOperations = // implementation
// Find by ID and class
val chunk = searchOps.findById("chunk-123", Chunk::class.java)
if (chunk != null) {
println("Found chunk: ${chunk.text.take(50)}...")
}
// Find by ID and type string
val retrievable = searchOps.findById<Retrievable>("chunk-123", "Chunk")
// Check if type is supported
if (searchOps.supportsType("Chunk")) {
println("Chunk type is supported")
}
if (searchOps.supportsType("CustomEntity")) {
val entity = searchOps.findById("entity-456", "CustomEntity")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
val clusterFinder: ClusterFinder = // implementation
// Find clusters with custom parameters
val clusters = clusterFinder.findClusters(
ClusterRetrievalRequest<NamedEntityData>()
.withSimilarityThreshold(0.8)
.withTopK(15)
)
// Process clusters
clusters.forEach { cluster ->
println("Cluster anchor: ${cluster.anchor.name}")
println("Similar items (${cluster.similar.size}):")
cluster.similar.forEach { result ->
println(" - ${result.score}: ${result.content.name}")
}
println()
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
// Use default formatter
val formatter = SimpleRetrievableResultsFormatter
val searchOps: VectorSearch = // implementation
val results = searchOps.vectorSearch(
request = TextSimilaritySearchRequest("kotlin tutorial", topK = 5),
clazz = Chunk::class.java
)
val similarityResults = SimilarityResults.fromList(results)
val formatted = formatter.formatResults(similarityResults)
println(formatted)
// Custom formatter
val customFormatter = RetrievableResultsFormatter { results ->
buildString {
appendLine("=== Search Results (${results.results.size}) ===")
results.results.forEachIndexed { index, result ->
appendLine("${index + 1}. Score: ${"%.3f".format(result.score)}")
appendLine(" ${result.content.infoString()}")
appendLine()
}
}
}
val customFormatted = customFormatter.formatResults(similarityResults)
println(customFormatted)import com.embabel.agent.rag.service.RetrievableIdentifier
import com.embabel.agent.rag.model.*
// Create identifiers
val chunkId = RetrievableIdentifier.forChunk("chunk-123")
println("Chunk ID: ${chunkId.id}, Type: ${chunkId.type}")
val userId = RetrievableIdentifier.forUser("user-456")
println("User ID: ${userId.id}, Type: ${userId.type}")
// From a retrievable object
val chunk: Chunk = // ...
val identifier = RetrievableIdentifier.from(chunk)
println("Identifier: ${identifier.id} (${identifier.type})")
// Custom identifier
val customId = RetrievableIdentifier("entity-789", "CustomEntity")import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*
val searchOps: FilteringVectorSearch & FilteringTextSearch & ResultExpander = // implementation
// 1. Initial vector search
val vectorResults = searchOps.vectorSearchWithFilter(
request = TextSimilaritySearchRequest("database optimization", topK = 10),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("category", "performance"),
entityFilter = null
)
println("Vector search: ${vectorResults.size} results")
// 2. Expand top result for context
if (vectorResults.isNotEmpty()) {
val topResult = vectorResults.first()
val context = searchOps.expandResult(
id = topResult.content.id,
method = ResultExpander.Method.SEQUENCE,
elementsToAdd = 1
)
println("Context: ${context.size} elements")
}
// 3. Text search for specific terms
val textResults = searchOps.textSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "indexing AND (performance OR optimization)",
topK = 10
),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.gte("version", 2.0),
entityFilter = null
)
println("Text search: ${textResults.size} results")
// 4. Format combined results
val allResults = (vectorResults + textResults).distinctBy { it.content.id }
val formatted = SimpleRetrievableResultsFormatter.formatResults(
SimilarityResults.fromList(allResults)
)
println(formatted)import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
inline fun <reified T : Retrievable> search(
searchOps: VectorSearch,
query: String,
topK: Int = 10
): List<SimilarityResult<T>> {
return searchOps.vectorSearch(
request = TextSimilaritySearchRequest(query, topK),
clazz = T::class.java
)
}
val searchOps: VectorSearch = // implementation
// Type-safe searches
val chunks = search<Chunk>(searchOps, "kotlin tutorial")
val facts = search<Fact>(searchOps, "kotlin features")
// Process with type safety
chunks.forEach { result ->
val chunk: Chunk = result.content
println("Chunk: ${chunk.text}")
}