RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
Comprehensive search capabilities including vector search, text search, and regex search with filtering support.
Marker interface for search operation implementations.
interface SearchOperationsOperations that support type checking.
interface TypeRetrievalOperations : SearchOperations {
fun supportsType(type: String): Boolean
}Methods:
supportsType(): Check if a type is supported for retrievalParameters:
type: Type name (label) to checkReturns: true if type is supported
Find operations by ID.
interface FinderOperations : TypeRetrievalOperations {
fun <T> findById(id: String, clazz: Class<T>): T?
fun <T : Retrievable> findById(id: String, type: String): T?
}Methods:
findById(id, clazz): Find by ID and convert to classfindById(id, type): Find by ID using type stringParameters:
id: Entity identifierclazz: Expected result classtype: Type name (label)Returns: Entity or null if not found
Semantic similarity search using vector embeddings.
Basic vector similarity search operations.
interface VectorSearch : TypeRetrievalOperations {
/**
* Perform vector similarity search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @return List of similarity results ordered by relevance
*/
fun <T : Retrievable> vectorSearch(
request: TextSimilaritySearchRequest,
clazz: Class<T>
): List<SimilarityResult<T>>
}Methods:
vectorSearch(): Execute semantic similarity searchParameters:
request: Search request with query and parametersclazz: Type of results to returnReturns: List of results sorted by similarity score (descending)
Vector search with metadata and entity filtering.
interface FilteringVectorSearch : VectorSearch {
/**
* Perform filtered vector similarity search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of similarity results
*/
fun <T : Retrievable> vectorSearchWithFilter(
request: TextSimilaritySearchRequest,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
vectorSearchWithFilter(): Execute filtered vector searchParameters:
request: Search requestclazz: Result typemetadataFilter: Optional filter on metadata propertiesentityFilter: Optional filter on entity labelsReturns: Filtered results sorted by similarity
Full-text search operations with Lucene-like syntax support.
Basic full-text search operations.
interface TextSearch : TypeRetrievalOperations {
/**
* Notes about the Lucene syntax supported by this implementation
*/
val luceneSyntaxNotes: String
/**
* Perform full-text search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @return List of similarity results ordered by relevance
*/
fun <T : Retrievable> textSearch(
request: TextSimilaritySearchRequest,
clazz: Class<T>
): List<SimilarityResult<T>>
}Properties:
luceneSyntaxNotes: Description of supported Lucene syntaxMethods:
textSearch(): Execute full-text searchParameters:
request: Search request with Lucene queryclazz: Result typeReturns: List of results sorted by relevance
Query Syntax: Supports Lucene-like query syntax including:
Text search with metadata and entity filtering.
interface FilteringTextSearch : TextSearch {
/**
* Perform filtered full-text search
* @param request Search request with query text and parameters
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of similarity results
*/
fun <T : Retrievable> textSearchWithFilter(
request: TextSimilaritySearchRequest,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
textSearchWithFilter(): Execute filtered text searchParameters:
request: Search request with Lucene queryclazz: Result typemetadataFilter: Optional metadata filterentityFilter: Optional entity label filterReturns: Filtered results sorted by relevance
Pattern-based search using regular expressions.
Basic regex search operations.
interface RegexSearchOperations : SearchOperations {
/**
* Search using regular expression pattern
* @param regex Regular expression pattern
* @param topK Maximum number of results to return
* @param clazz Class of retrievable items to search
* @return List of matching results
*/
fun <T : Retrievable> regexSearch(
regex: Regex,
topK: Int,
clazz: Class<T>
): List<SimilarityResult<T>>
}Methods:
regexSearch(): Search using regex patternParameters:
regex: Regular expression to matchtopK: Maximum results to returnclazz: Result typeReturns: List of matching results (up to topK)
Regex search with metadata and entity filtering.
interface FilteringRegexSearch : RegexSearchOperations {
/**
* Search using regex pattern with filtering
* @param regex Regular expression pattern
* @param topK Maximum number of results to return
* @param clazz Class of retrievable items to search
* @param metadataFilter Optional property filter for metadata
* @param entityFilter Optional entity filter for labels
* @return Filtered list of matching results
*/
fun <T : Retrievable> regexSearchWithFilter(
regex: Regex,
topK: Int,
clazz: Class<T>,
metadataFilter: PropertyFilter?,
entityFilter: EntityFilter?
): List<SimilarityResult<T>>
}Methods:
regexSearchWithFilter(): Execute filtered regex searchParameters:
regex: Regular expression patterntopK: Maximum resultsclazz: Result typemetadataFilter: Optional metadata filterentityFilter: Optional entity filterReturns: Filtered matching results (up to topK)
Expand search results with additional context.
Interface for expanding search results with surrounding chunks or parent sections.
interface ResultExpander : SearchOperations {
/**
* Expand a result to include additional context
* @param id ID of the element to expand
* @param method Expansion method (SEQUENCE or ZOOM_OUT)
* @param elementsToAdd Number of elements to add
* @return List of expanded content elements
*/
fun expandResult(
id: String,
method: Method,
elementsToAdd: Int
): List<ContentElement>
enum class Method {
SEQUENCE, // Expand to previous/next chunks in sequence
ZOOM_OUT // Expand to enclosing section
}
}Methods:
expandResult(): Expand a result with contextParameters:
id: Element ID to expandmethod: Expansion strategy (SEQUENCE or ZOOM_OUT)elementsToAdd: Number of elements to includeReturns: List of content elements providing context
Expansion Methods:
SEQUENCE: Include previous/next chunks in document sequenceZOOM_OUT: Include parent sections in hierarchyCommonly implemented combination of search capabilities.
interface CoreSearchOperations : VectorSearch, TextSearchCombines both vector and text search capabilities.
Data classes and interfaces supporting search operations.
Identifier for any Retrievable object.
data class RetrievableIdentifier(
val id: String, // Unique identifier within type
val type: String // Type/namespace (label)
) {
companion object {
fun forChunk(id: String): RetrievableIdentifier
fun forUser(id: String): RetrievableIdentifier
fun from(retrievable: Retrievable): RetrievableIdentifier
}
}Properties:
id: Unique identifiertype: Type label (e.g., "Chunk", "Person")Factory Methods:
forChunk(): Create identifier for chunkforUser(): Create identifier for userfrom(): Create from retrievable objectCluster of similar items.
data class Cluster<E>(
val anchor: E,
val similar: List<SimilarityResult<E>>
)Properties:
anchor: Central item in clustersimilar: List of similar items with scoresRequest parameters for cluster retrieval.
data class ClusterRetrievalRequest<E>(
val similarityThreshold: ZeroToOne = 0.7,
val topK: Int = 10,
val vectorIndex: String = "embabel-entity-index"
) {
fun withSimilarityThreshold(
similarityThreshold: ZeroToOne
): ClusterRetrievalRequest<E>
fun withTopK(topK: Int): ClusterRetrievalRequest<E>
}Properties:
similarityThreshold: Minimum similarity (0.0-1.0)topK: Maximum results per clustervectorIndex: Vector index nameMethods:
withSimilarityThreshold(): Copy with new thresholdwithTopK(): Copy with new topKInterface for finding clusters of similar items.
interface ClusterFinder {
fun <E> findClusters(
opts: ClusterRetrievalRequest<E>
): List<Cluster<E>>
}Methods:
findClusters(): Find clusters of similar itemsParameters:
opts: Clustering parametersReturns: List of clusters with anchors and similar items
Container for similarity search results.
interface SimilarityResults<R : Retrievable> {
val results: List<SimilarityResult<out R>>
companion object {
@JvmStatic
fun <R : Retrievable> fromList(
results: List<SimilarityResult<out R>>
): SimilarityResults<Retrievable>
}
}Properties:
results: List of similarity resultsFactory Methods:
fromList(): Create from list of resultsFormats search results for display.
fun interface RetrievableResultsFormatter {
fun formatResults(
similarityResults: SimilarityResults<out Retrievable>
): String
}Methods:
formatResults(): Format results as stringParameters:
similarityResults: Results to formatReturns: Formatted string representation
Default formatter implementation.
object SimpleRetrievableResultsFormatter : RetrievableResultsFormatter {
override fun formatResults(
similarityResults: SimilarityResults<out Retrievable>
): String
}Methods:
formatResults(): Format with default templateBuilders for constructing search operation instances.
Base builder interface for search operations.
interface SearchOperationsBuilder<T, THIS> {
fun withName(name: String): THIS
fun withEmbeddingService(embeddingService: EmbeddingService): THIS
fun withChunkTransformer(chunkTransformer: ChunkTransformer): THIS
fun build(): T
}Methods:
withName(): Set search operations namewithEmbeddingService(): Set embedding servicewithChunkTransformer(): Set chunk transformerbuild(): Construct search operations instanceBuilder for search operations with ingestion capabilities.
interface IngestingSearchOperationsBuilder<T, THIS> :
SearchOperationsBuilder<T, THIS> {
fun withChunkerConfig(chunkerConfig: ContentChunker.Config): THIS
fun withContentChunker(contentChunker: ContentChunker): THIS
}Methods:
withChunkerConfig(): Set chunker configurationwithContentChunker(): Set content chunkerimport com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.Chunk
val searchOps: VectorSearch = // implementation
// Simple vector search
val results = searchOps.vectorSearch(
request = TextSimilaritySearchRequest(
query = "machine learning algorithms",
topK = 10,
similarityThreshold = 0.7
),
clazz = Chunk::class.java
)
// Process results
results.forEach { result ->
println("Score: ${result.score}")
println("Content: ${result.content.text}")
println("---")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*
val searchOps: FilteringVectorSearch = // implementation
// Build complex filter
val metadataFilter = PropertyFilter.eq("category", "security")
.and(PropertyFilter.gte("version", 2.0))
.and(PropertyFilter.contains("tags", "authentication"))
val entityFilter = EntityFilter.hasAnyLabel("Chunk", "Fact")
// Vector search with filtering
val results = searchOps.vectorSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "authentication setup",
topK = 10,
similarityThreshold = 0.75
),
clazz = Chunk::class.java,
metadataFilter = metadataFilter,
entityFilter = entityFilter
)
println("Found ${results.size} results")import com.embabel.agent.rag.service.*
val searchOps: TextSearch = // implementation
// Check Lucene syntax support
println("Syntax notes: ${searchOps.luceneSyntaxNotes}")
// Perform text search with Lucene syntax
val results = searchOps.textSearch(
request = TextSimilaritySearchRequest(
query = "authentication AND (oauth OR jwt) NOT deprecated",
topK = 20
),
clazz = Chunk::class.java
)
results.forEach { result ->
println("${result.score}: ${result.content.text.take(100)}...")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
val searchOps: FilteringTextSearch = // implementation
// Text search with metadata filter
val results = searchOps.textSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "installation OR setup",
topK = 15
),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("category", "tutorial")
.and(PropertyFilter.ne("status", "deprecated")),
entityFilter = null
)import com.embabel.agent.rag.service.*
val searchOps: RegexSearchOperations = // implementation
// Search for email addresses
val emailPattern = Regex("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
val results = searchOps.regexSearch(
regex = emailPattern,
topK = 50,
clazz = Chunk::class.java
)
// Search for version numbers
val versionPattern = Regex("v?\\d+\\.\\d+\\.\\d+")
val versionResults = searchOps.regexSearch(
regex = versionPattern,
topK = 100,
clazz = Chunk::class.java
)import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
val searchOps: FilteringRegexSearch = // implementation
// Search for URLs in specific sections
val urlPattern = Regex("https?://[^\\s]+")
val results = searchOps.regexSearchWithFilter(
regex = urlPattern,
topK = 30,
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("section", "references")
.and(PropertyFilter.eq("status", "published")),
entityFilter = null
)import com.embabel.agent.rag.service.*
val searchOps: ResultExpander = // implementation
val vectorSearchOps: VectorSearch = // implementation
// Find a chunk, then expand for more context
val initialResults = vectorSearchOps.vectorSearch(
request = TextSimilaritySearchRequest("error handling", topK = 1),
clazz = Chunk::class.java
)
val firstResult = initialResults.first()
// Expand to include 2 chunks before and after
val sequenceContext = searchOps.expandResult(
id = firstResult.content.id,
method = ResultExpander.Method.SEQUENCE,
elementsToAdd = 2
)
println("Sequence context: ${sequenceContext.size} elements")
// Zoom out to parent section
val parentContext = searchOps.expandResult(
id = firstResult.content.id,
method = ResultExpander.Method.ZOOM_OUT,
elementsToAdd = 1
)
println("Parent context: ${parentContext.size} elements")import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
val searchOps: FinderOperations = // implementation
// Find by ID and class
val chunk = searchOps.findById("chunk-123", Chunk::class.java)
if (chunk != null) {
println("Found chunk: ${chunk.text.take(50)}...")
}
// Find by ID and type string
val retrievable = searchOps.findById<Retrievable>("chunk-123", "Chunk")
// Check if type is supported
if (searchOps.supportsType("Chunk")) {
println("Chunk type is supported")
}
if (searchOps.supportsType("CustomEntity")) {
val entity = searchOps.findById("entity-456", "CustomEntity")
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
val clusterFinder: ClusterFinder = // implementation
// Find clusters with custom parameters
val clusters = clusterFinder.findClusters(
ClusterRetrievalRequest<NamedEntityData>()
.withSimilarityThreshold(0.8)
.withTopK(15)
)
// Process clusters
clusters.forEach { cluster ->
println("Cluster anchor: ${cluster.anchor.name}")
println("Similar items (${cluster.similar.size}):")
cluster.similar.forEach { result ->
println(" - ${result.score}: ${result.content.name}")
}
println()
}import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
// Use default formatter
val formatter = SimpleRetrievableResultsFormatter
val searchOps: VectorSearch = // implementation
val results = searchOps.vectorSearch(
request = TextSimilaritySearchRequest("kotlin tutorial", topK = 5),
clazz = Chunk::class.java
)
val similarityResults = SimilarityResults.fromList(results)
val formatted = formatter.formatResults(similarityResults)
println(formatted)
// Custom formatter
val customFormatter = RetrievableResultsFormatter { results ->
buildString {
appendLine("=== Search Results (${results.results.size}) ===")
results.results.forEachIndexed { index, result ->
appendLine("${index + 1}. Score: ${"%.3f".format(result.score)}")
appendLine(" ${result.content.infoString()}")
appendLine()
}
}
}
val customFormatted = customFormatter.formatResults(similarityResults)
println(customFormatted)import com.embabel.agent.rag.service.RetrievableIdentifier
import com.embabel.agent.rag.model.*
// Create identifiers
val chunkId = RetrievableIdentifier.forChunk("chunk-123")
println("Chunk ID: ${chunkId.id}, Type: ${chunkId.type}")
val userId = RetrievableIdentifier.forUser("user-456")
println("User ID: ${userId.id}, Type: ${userId.type}")
// From a retrievable object
val chunk: Chunk = // ...
val identifier = RetrievableIdentifier.from(chunk)
println("Identifier: ${identifier.id} (${identifier.type})")
// Custom identifier
val customId = RetrievableIdentifier("entity-789", "CustomEntity")import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*
val searchOps: FilteringVectorSearch & FilteringTextSearch & ResultExpander = // implementation
// 1. Initial vector search
val vectorResults = searchOps.vectorSearchWithFilter(
request = TextSimilaritySearchRequest("database optimization", topK = 10),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.eq("category", "performance"),
entityFilter = null
)
println("Vector search: ${vectorResults.size} results")
// 2. Expand top result for context
if (vectorResults.isNotEmpty()) {
val topResult = vectorResults.first()
val context = searchOps.expandResult(
id = topResult.content.id,
method = ResultExpander.Method.SEQUENCE,
elementsToAdd = 1
)
println("Context: ${context.size} elements")
}
// 3. Text search for specific terms
val textResults = searchOps.textSearchWithFilter(
request = TextSimilaritySearchRequest(
query = "indexing AND (performance OR optimization)",
topK = 10
),
clazz = Chunk::class.java,
metadataFilter = PropertyFilter.gte("version", 2.0),
entityFilter = null
)
println("Text search: ${textResults.size} results")
// 4. Format combined results
val allResults = (vectorResults + textResults).distinctBy { it.content.id }
val formatted = SimpleRetrievableResultsFormatter.formatResults(
SimilarityResults.fromList(allResults)
)
println(formatted)import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*
inline fun <reified T : Retrievable> search(
searchOps: VectorSearch,
query: String,
topK: Int = 10
): List<SimilarityResult<T>> {
return searchOps.vectorSearch(
request = TextSimilaritySearchRequest(query, topK),
clazz = T::class.java
)
}
val searchOps: VectorSearch = // implementation
// Type-safe searches
val chunks = search<Chunk>(searchOps, "kotlin tutorial")
val facts = search<Fact>(searchOps, "kotlin features")
// Process with type safety
chunks.forEach { result ->
val chunk: Chunk = result.content
println("Chunk: ${chunk.text}")
}