CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-com-embabel-agent--embabel-agent-rag-core

RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities

Overview
Eval results
Files

search-operations.mddocs/api-reference/

Search Operations API Reference

Comprehensive search capabilities including vector search, text search, and regex search with filtering support.

Base Interfaces

SearchOperations

Marker interface for search operation implementations.

interface SearchOperations

TypeRetrievalOperations

Operations that support type checking.

interface TypeRetrievalOperations : SearchOperations {
    fun supportsType(type: String): Boolean
}

Methods:

  • supportsType(): Check if a type is supported for retrieval

Parameters:

  • type: Type name (label) to check

Returns: true if type is supported

FinderOperations

Find operations by ID.

interface FinderOperations : TypeRetrievalOperations {
    fun <T> findById(id: String, clazz: Class<T>): T?
    fun <T : Retrievable> findById(id: String, type: String): T?
}

Methods:

  • findById(id, clazz): Find by ID and convert to class
  • findById(id, type): Find by ID using type string

Parameters:

  • id: Entity identifier
  • clazz: Expected result class
  • type: Type name (label)

Returns: Entity or null if not found


Vector Search

Semantic similarity search using vector embeddings.

VectorSearch

Basic vector similarity search operations.

interface VectorSearch : TypeRetrievalOperations {
    /**
     * Perform vector similarity search
     * @param request Search request with query text and parameters
     * @param clazz Class of retrievable items to search
     * @return List of similarity results ordered by relevance
     */
    fun <T : Retrievable> vectorSearch(
        request: TextSimilaritySearchRequest,
        clazz: Class<T>
    ): List<SimilarityResult<T>>
}

Methods:

  • vectorSearch(): Execute semantic similarity search

Parameters:

  • request: Search request with query and parameters
  • clazz: Type of results to return

Returns: List of results sorted by similarity score (descending)

FilteringVectorSearch

Vector search with metadata and entity filtering.

interface FilteringVectorSearch : VectorSearch {
    /**
     * Perform filtered vector similarity search
     * @param request Search request with query text and parameters
     * @param clazz Class of retrievable items to search
     * @param metadataFilter Optional property filter for metadata
     * @param entityFilter Optional entity filter for labels
     * @return Filtered list of similarity results
     */
    fun <T : Retrievable> vectorSearchWithFilter(
        request: TextSimilaritySearchRequest,
        clazz: Class<T>,
        metadataFilter: PropertyFilter?,
        entityFilter: EntityFilter?
    ): List<SimilarityResult<T>>
}

Methods:

  • vectorSearchWithFilter(): Execute filtered vector search

Parameters:

  • request: Search request
  • clazz: Result type
  • metadataFilter: Optional filter on metadata properties
  • entityFilter: Optional filter on entity labels

Returns: Filtered results sorted by similarity


Text Search

Full-text search operations with Lucene-like syntax support.

TextSearch

Basic full-text search operations.

interface TextSearch : TypeRetrievalOperations {
    /**
     * Notes about the Lucene syntax supported by this implementation
     */
    val luceneSyntaxNotes: String

    /**
     * Perform full-text search
     * @param request Search request with query text and parameters
     * @param clazz Class of retrievable items to search
     * @return List of similarity results ordered by relevance
     */
    fun <T : Retrievable> textSearch(
        request: TextSimilaritySearchRequest,
        clazz: Class<T>
    ): List<SimilarityResult<T>>
}

Properties:

  • luceneSyntaxNotes: Description of supported Lucene syntax

Methods:

  • textSearch(): Execute full-text search

Parameters:

  • request: Search request with Lucene query
  • clazz: Result type

Returns: List of results sorted by relevance

Query Syntax: Supports Lucene-like query syntax including:

  • AND/OR/NOT operators
  • Field-specific searches
  • Wildcard patterns
  • Phrase queries
  • Proximity searches

FilteringTextSearch

Text search with metadata and entity filtering.

interface FilteringTextSearch : TextSearch {
    /**
     * Perform filtered full-text search
     * @param request Search request with query text and parameters
     * @param clazz Class of retrievable items to search
     * @param metadataFilter Optional property filter for metadata
     * @param entityFilter Optional entity filter for labels
     * @return Filtered list of similarity results
     */
    fun <T : Retrievable> textSearchWithFilter(
        request: TextSimilaritySearchRequest,
        clazz: Class<T>,
        metadataFilter: PropertyFilter?,
        entityFilter: EntityFilter?
    ): List<SimilarityResult<T>>
}

Methods:

  • textSearchWithFilter(): Execute filtered text search

Parameters:

  • request: Search request with Lucene query
  • clazz: Result type
  • metadataFilter: Optional metadata filter
  • entityFilter: Optional entity label filter

Returns: Filtered results sorted by relevance


Regex Search

Pattern-based search using regular expressions.

RegexSearchOperations

Basic regex search operations.

interface RegexSearchOperations : SearchOperations {
    /**
     * Search using regular expression pattern
     * @param regex Regular expression pattern
     * @param topK Maximum number of results to return
     * @param clazz Class of retrievable items to search
     * @return List of matching results
     */
    fun <T : Retrievable> regexSearch(
        regex: Regex,
        topK: Int,
        clazz: Class<T>
    ): List<SimilarityResult<T>>
}

Methods:

  • regexSearch(): Search using regex pattern

Parameters:

  • regex: Regular expression to match
  • topK: Maximum results to return
  • clazz: Result type

Returns: List of matching results (up to topK)

FilteringRegexSearch

Regex search with metadata and entity filtering.

interface FilteringRegexSearch : RegexSearchOperations {
    /**
     * Search using regex pattern with filtering
     * @param regex Regular expression pattern
     * @param topK Maximum number of results to return
     * @param clazz Class of retrievable items to search
     * @param metadataFilter Optional property filter for metadata
     * @param entityFilter Optional entity filter for labels
     * @return Filtered list of matching results
     */
    fun <T : Retrievable> regexSearchWithFilter(
        regex: Regex,
        topK: Int,
        clazz: Class<T>,
        metadataFilter: PropertyFilter?,
        entityFilter: EntityFilter?
    ): List<SimilarityResult<T>>
}

Methods:

  • regexSearchWithFilter(): Execute filtered regex search

Parameters:

  • regex: Regular expression pattern
  • topK: Maximum results
  • clazz: Result type
  • metadataFilter: Optional metadata filter
  • entityFilter: Optional entity filter

Returns: Filtered matching results (up to topK)


Result Expansion

Expand search results with additional context.

ResultExpander

Interface for expanding search results with surrounding chunks or parent sections.

interface ResultExpander : SearchOperations {
    /**
     * Expand a result to include additional context
     * @param id ID of the element to expand
     * @param method Expansion method (SEQUENCE or ZOOM_OUT)
     * @param elementsToAdd Number of elements to add
     * @return List of expanded content elements
     */
    fun expandResult(
        id: String,
        method: Method,
        elementsToAdd: Int
    ): List<ContentElement>

    enum class Method {
        SEQUENCE,   // Expand to previous/next chunks in sequence
        ZOOM_OUT    // Expand to enclosing section
    }
}

Methods:

  • expandResult(): Expand a result with context

Parameters:

  • id: Element ID to expand
  • method: Expansion strategy (SEQUENCE or ZOOM_OUT)
  • elementsToAdd: Number of elements to include

Returns: List of content elements providing context

Expansion Methods:

  • SEQUENCE: Include previous/next chunks in document sequence
  • ZOOM_OUT: Include parent sections in hierarchy

Core Search Operations

Commonly implemented combination of search capabilities.

interface CoreSearchOperations : VectorSearch, TextSearch

Combines both vector and text search capabilities.


Supporting Types

Data classes and interfaces supporting search operations.

RetrievableIdentifier

Identifier for any Retrievable object.

data class RetrievableIdentifier(
    val id: String,      // Unique identifier within type
    val type: String     // Type/namespace (label)
) {
    companion object {
        fun forChunk(id: String): RetrievableIdentifier
        fun forUser(id: String): RetrievableIdentifier
        fun from(retrievable: Retrievable): RetrievableIdentifier
    }
}

Properties:

  • id: Unique identifier
  • type: Type label (e.g., "Chunk", "Person")

Factory Methods:

  • forChunk(): Create identifier for chunk
  • forUser(): Create identifier for user
  • from(): Create from retrievable object

Cluster

Cluster of similar items.

data class Cluster<E>(
    val anchor: E,
    val similar: List<SimilarityResult<E>>
)

Properties:

  • anchor: Central item in cluster
  • similar: List of similar items with scores

ClusterRetrievalRequest

Request parameters for cluster retrieval.

data class ClusterRetrievalRequest<E>(
    val similarityThreshold: ZeroToOne = 0.7,
    val topK: Int = 10,
    val vectorIndex: String = "embabel-entity-index"
) {
    fun withSimilarityThreshold(
        similarityThreshold: ZeroToOne
    ): ClusterRetrievalRequest<E>

    fun withTopK(topK: Int): ClusterRetrievalRequest<E>
}

Properties:

  • similarityThreshold: Minimum similarity (0.0-1.0)
  • topK: Maximum results per cluster
  • vectorIndex: Vector index name

Methods:

  • withSimilarityThreshold(): Copy with new threshold
  • withTopK(): Copy with new topK

ClusterFinder

Interface for finding clusters of similar items.

interface ClusterFinder {
    fun <E> findClusters(
        opts: ClusterRetrievalRequest<E>
    ): List<Cluster<E>>
}

Methods:

  • findClusters(): Find clusters of similar items

Parameters:

  • opts: Clustering parameters

Returns: List of clusters with anchors and similar items

SimilarityResults

Container for similarity search results.

interface SimilarityResults<R : Retrievable> {
    val results: List<SimilarityResult<out R>>

    companion object {
        @JvmStatic
        fun <R : Retrievable> fromList(
            results: List<SimilarityResult<out R>>
        ): SimilarityResults<Retrievable>
    }
}

Properties:

  • results: List of similarity results

Factory Methods:

  • fromList(): Create from list of results

RetrievableResultsFormatter

Formats search results for display.

fun interface RetrievableResultsFormatter {
    fun formatResults(
        similarityResults: SimilarityResults<out Retrievable>
    ): String
}

Methods:

  • formatResults(): Format results as string

Parameters:

  • similarityResults: Results to format

Returns: Formatted string representation

SimpleRetrievableResultsFormatter

Default formatter implementation.

object SimpleRetrievableResultsFormatter : RetrievableResultsFormatter {
    override fun formatResults(
        similarityResults: SimilarityResults<out Retrievable>
    ): String
}

Methods:

  • formatResults(): Format with default template

Builder Interfaces

Builders for constructing search operation instances.

SearchOperationsBuilder

Base builder interface for search operations.

interface SearchOperationsBuilder<T, THIS> {
    fun withName(name: String): THIS
    fun withEmbeddingService(embeddingService: EmbeddingService): THIS
    fun withChunkTransformer(chunkTransformer: ChunkTransformer): THIS
    fun build(): T
}

Methods:

  • withName(): Set search operations name
  • withEmbeddingService(): Set embedding service
  • withChunkTransformer(): Set chunk transformer
  • build(): Construct search operations instance

IngestingSearchOperationsBuilder

Builder for search operations with ingestion capabilities.

interface IngestingSearchOperationsBuilder<T, THIS> :
    SearchOperationsBuilder<T, THIS> {

    fun withChunkerConfig(chunkerConfig: ContentChunker.Config): THIS
    fun withContentChunker(contentChunker: ContentChunker): THIS
}

Methods:

  • withChunkerConfig(): Set chunker configuration
  • withContentChunker(): Set content chunker

Usage Examples

Basic Vector Search

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.Chunk

val searchOps: VectorSearch = // implementation

// Simple vector search
val results = searchOps.vectorSearch(
    request = TextSimilaritySearchRequest(
        query = "machine learning algorithms",
        topK = 10,
        similarityThreshold = 0.7
    ),
    clazz = Chunk::class.java
)

// Process results
results.forEach { result ->
    println("Score: ${result.score}")
    println("Content: ${result.content.text}")
    println("---")
}

Filtered Vector Search

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*

val searchOps: FilteringVectorSearch = // implementation

// Build complex filter
val metadataFilter = PropertyFilter.eq("category", "security")
    .and(PropertyFilter.gte("version", 2.0))
    .and(PropertyFilter.contains("tags", "authentication"))

val entityFilter = EntityFilter.hasAnyLabel("Chunk", "Fact")

// Vector search with filtering
val results = searchOps.vectorSearchWithFilter(
    request = TextSimilaritySearchRequest(
        query = "authentication setup",
        topK = 10,
        similarityThreshold = 0.75
    ),
    clazz = Chunk::class.java,
    metadataFilter = metadataFilter,
    entityFilter = entityFilter
)

println("Found ${results.size} results")

Full-Text Search

import com.embabel.agent.rag.service.*

val searchOps: TextSearch = // implementation

// Check Lucene syntax support
println("Syntax notes: ${searchOps.luceneSyntaxNotes}")

// Perform text search with Lucene syntax
val results = searchOps.textSearch(
    request = TextSimilaritySearchRequest(
        query = "authentication AND (oauth OR jwt) NOT deprecated",
        topK = 20
    ),
    clazz = Chunk::class.java
)

results.forEach { result ->
    println("${result.score}: ${result.content.text.take(100)}...")
}

Filtered Text Search

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*

val searchOps: FilteringTextSearch = // implementation

// Text search with metadata filter
val results = searchOps.textSearchWithFilter(
    request = TextSimilaritySearchRequest(
        query = "installation OR setup",
        topK = 15
    ),
    clazz = Chunk::class.java,
    metadataFilter = PropertyFilter.eq("category", "tutorial")
        .and(PropertyFilter.ne("status", "deprecated")),
    entityFilter = null
)

Regex Search

import com.embabel.agent.rag.service.*

val searchOps: RegexSearchOperations = // implementation

// Search for email addresses
val emailPattern = Regex("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
val results = searchOps.regexSearch(
    regex = emailPattern,
    topK = 50,
    clazz = Chunk::class.java
)

// Search for version numbers
val versionPattern = Regex("v?\\d+\\.\\d+\\.\\d+")
val versionResults = searchOps.regexSearch(
    regex = versionPattern,
    topK = 100,
    clazz = Chunk::class.java
)

Filtered Regex Search

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*

val searchOps: FilteringRegexSearch = // implementation

// Search for URLs in specific sections
val urlPattern = Regex("https?://[^\\s]+")
val results = searchOps.regexSearchWithFilter(
    regex = urlPattern,
    topK = 30,
    clazz = Chunk::class.java,
    metadataFilter = PropertyFilter.eq("section", "references")
        .and(PropertyFilter.eq("status", "published")),
    entityFilter = null
)

Result Expansion

import com.embabel.agent.rag.service.*

val searchOps: ResultExpander = // implementation
val vectorSearchOps: VectorSearch = // implementation

// Find a chunk, then expand for more context
val initialResults = vectorSearchOps.vectorSearch(
    request = TextSimilaritySearchRequest("error handling", topK = 1),
    clazz = Chunk::class.java
)

val firstResult = initialResults.first()

// Expand to include 2 chunks before and after
val sequenceContext = searchOps.expandResult(
    id = firstResult.content.id,
    method = ResultExpander.Method.SEQUENCE,
    elementsToAdd = 2
)

println("Sequence context: ${sequenceContext.size} elements")

// Zoom out to parent section
val parentContext = searchOps.expandResult(
    id = firstResult.content.id,
    method = ResultExpander.Method.ZOOM_OUT,
    elementsToAdd = 1
)

println("Parent context: ${parentContext.size} elements")

Finding by ID

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*

val searchOps: FinderOperations = // implementation

// Find by ID and class
val chunk = searchOps.findById("chunk-123", Chunk::class.java)
if (chunk != null) {
    println("Found chunk: ${chunk.text.take(50)}...")
}

// Find by ID and type string
val retrievable = searchOps.findById<Retrievable>("chunk-123", "Chunk")

// Check if type is supported
if (searchOps.supportsType("Chunk")) {
    println("Chunk type is supported")
}

if (searchOps.supportsType("CustomEntity")) {
    val entity = searchOps.findById("entity-456", "CustomEntity")
}

Cluster Finding

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*

val clusterFinder: ClusterFinder = // implementation

// Find clusters with custom parameters
val clusters = clusterFinder.findClusters(
    ClusterRetrievalRequest<NamedEntityData>()
        .withSimilarityThreshold(0.8)
        .withTopK(15)
)

// Process clusters
clusters.forEach { cluster ->
    println("Cluster anchor: ${cluster.anchor.name}")
    println("Similar items (${cluster.similar.size}):")
    cluster.similar.forEach { result ->
        println("  - ${result.score}: ${result.content.name}")
    }
    println()
}

Formatting Results

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*

// Use default formatter
val formatter = SimpleRetrievableResultsFormatter

val searchOps: VectorSearch = // implementation
val results = searchOps.vectorSearch(
    request = TextSimilaritySearchRequest("kotlin tutorial", topK = 5),
    clazz = Chunk::class.java
)

val similarityResults = SimilarityResults.fromList(results)
val formatted = formatter.formatResults(similarityResults)
println(formatted)

// Custom formatter
val customFormatter = RetrievableResultsFormatter { results ->
    buildString {
        appendLine("=== Search Results (${results.results.size}) ===")
        results.results.forEachIndexed { index, result ->
            appendLine("${index + 1}. Score: ${"%.3f".format(result.score)}")
            appendLine("   ${result.content.infoString()}")
            appendLine()
        }
    }
}

val customFormatted = customFormatter.formatResults(similarityResults)
println(customFormatted)

Using Retrievable Identifiers

import com.embabel.agent.rag.service.RetrievableIdentifier
import com.embabel.agent.rag.model.*

// Create identifiers
val chunkId = RetrievableIdentifier.forChunk("chunk-123")
println("Chunk ID: ${chunkId.id}, Type: ${chunkId.type}")

val userId = RetrievableIdentifier.forUser("user-456")
println("User ID: ${userId.id}, Type: ${userId.type}")

// From a retrievable object
val chunk: Chunk = // ...
val identifier = RetrievableIdentifier.from(chunk)
println("Identifier: ${identifier.id} (${identifier.type})")

// Custom identifier
val customId = RetrievableIdentifier("entity-789", "CustomEntity")

Combined Search Workflow

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.filter.*
import com.embabel.agent.rag.model.*

val searchOps: FilteringVectorSearch & FilteringTextSearch & ResultExpander = // implementation

// 1. Initial vector search
val vectorResults = searchOps.vectorSearchWithFilter(
    request = TextSimilaritySearchRequest("database optimization", topK = 10),
    clazz = Chunk::class.java,
    metadataFilter = PropertyFilter.eq("category", "performance"),
    entityFilter = null
)

println("Vector search: ${vectorResults.size} results")

// 2. Expand top result for context
if (vectorResults.isNotEmpty()) {
    val topResult = vectorResults.first()
    val context = searchOps.expandResult(
        id = topResult.content.id,
        method = ResultExpander.Method.SEQUENCE,
        elementsToAdd = 1
    )
    println("Context: ${context.size} elements")
}

// 3. Text search for specific terms
val textResults = searchOps.textSearchWithFilter(
    request = TextSimilaritySearchRequest(
        query = "indexing AND (performance OR optimization)",
        topK = 10
    ),
    clazz = Chunk::class.java,
    metadataFilter = PropertyFilter.gte("version", 2.0),
    entityFilter = null
)

println("Text search: ${textResults.size} results")

// 4. Format combined results
val allResults = (vectorResults + textResults).distinctBy { it.content.id }
val formatted = SimpleRetrievableResultsFormatter.formatResults(
    SimilarityResults.fromList(allResults)
)
println(formatted)

Type-Safe Search

import com.embabel.agent.rag.service.*
import com.embabel.agent.rag.model.*

inline fun <reified T : Retrievable> search(
    searchOps: VectorSearch,
    query: String,
    topK: Int = 10
): List<SimilarityResult<T>> {
    return searchOps.vectorSearch(
        request = TextSimilaritySearchRequest(query, topK),
        clazz = T::class.java
    )
}

val searchOps: VectorSearch = // implementation

// Type-safe searches
val chunks = search<Chunk>(searchOps, "kotlin tutorial")
val facts = search<Fact>(searchOps, "kotlin features")

// Process with type safety
chunks.forEach { result ->
    val chunk: Chunk = result.content
    println("Chunk: ${chunk.text}")
}
tessl i tessl/maven-com-embabel-agent--embabel-agent-rag-core@0.3.1

docs

index.md

README.md

tile.json