RAG (Retrieval-Augmented Generation) framework for the Embabel Agent platform providing content ingestion, chunking, hierarchical navigation, and semantic search capabilities
Comprehensive utility classes and helper implementations for RAG operations, including in-memory repositories for testing, asset-based search, directory text search, and mathematical utilities for text and vector operations.
The support utilities package provides essential helper implementations that enable:
These utilities are particularly useful for testing RAG systems without requiring full database infrastructure, performing searches across different content types, and implementing custom similarity algorithms.
Lightweight in-memory implementation of NamedEntityDataRepository that provides full search and relationship support without requiring external storage systems. Ideal for testing, prototyping, and small-scale applications.
class InMemoryNamedEntityDataRepository(
/**
* Data dictionary for type resolution
*/
val dataDictionary: DataDictionary,
/**
* Optional embedding service for vector search
* If provided, enables vector similarity search operations
*/
val embeddingService: EmbeddingService? = null,
/**
* ObjectMapper for entity serialization
* Used for converting entities to/from JSON
*/
override val objectMapper: ObjectMapper = ObjectMapper()
) : NamedEntityDataRepository {
/**
* Current number of entities in repository
* @return Total count of stored entities
*/
val size: Int
/**
* Clear all entities from the repository
* Removes all stored entities and resets the repository to empty state
*/
fun clear()
// Implements all NamedEntityDataRepository methods including:
// - save(), findById(), delete()
// - vectorSearch(), textSearch(), regexSearch()
// - relationship queries and metadata filtering
}Basic Repository Operations
import com.embabel.agent.rag.service.support.*
import com.embabel.agent.rag.model.*
// Create in-memory repository
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = myDataDictionary,
embeddingService = myEmbeddingService
)
// Add entities
val person = SimpleNamedEntityData(
id = "person-1",
name = "Alice Smith",
description = "Software engineer specializing in distributed systems",
properties = mapOf(
"team" to "platform",
"role" to "engineer",
"experience" to "5 years"
)
)
repository.save(person)
// Search entities
val results = repository.vectorSearch(
request = TextSimilaritySearchRequest("platform engineer", topK = 10),
metadataFilter = null,
entityFilter = null
)
// Check repository size
println("Repository contains ${repository.size} entities")
// Clear repository when done
repository.clear()Testing with In-Memory Repository
import com.embabel.agent.rag.service.support.*
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.AfterEach
class EntitySearchTest {
private lateinit var testRepo: InMemoryNamedEntityDataRepository
@BeforeEach
fun setUp() {
// Create temporary repository for testing
testRepo = InMemoryNamedEntityDataRepository(
dataDictionary = DataDictionary.empty(),
embeddingService = null // No embeddings needed for basic tests
)
}
@AfterEach
fun tearDown() {
// Clean up after each test
testRepo.clear()
}
@Test
fun testBulkEntityStorage() {
// Add test data
val entities = (1..100).map { i ->
SimpleNamedEntityData(
id = "test-$i",
name = "Entity $i",
description = "Test entity number $i for validation",
properties = mapOf(
"index" to i,
"category" to if (i % 2 == 0) "even" else "odd"
)
)
}
entities.forEach { testRepo.save(it) }
// Perform test assertions
assertEquals(100, testRepo.size)
val entity50 = testRepo.findById("test-50")
assertNotNull(entity50)
assertEquals("Entity 50", entity50?.name)
}
@Test
fun testEntityDeletion() {
val entity = SimpleNamedEntityData(
id = "temp-1",
name = "Temporary",
description = "Temporary entity"
)
testRepo.save(entity)
assertEquals(1, testRepo.size)
testRepo.delete("temp-1")
assertEquals(0, testRepo.size)
}
}Combined Testing Workflow
import com.embabel.agent.rag.service.support.*
import com.embabel.agent.rag.model.*
// Create a complete in-memory RAG system for testing
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = DataDictionary.empty(),
embeddingService = myEmbeddingService
)
// Populate with test data
val documents = listOf(
"Machine learning algorithms for classification",
"Neural networks and deep learning techniques",
"Data preprocessing and feature engineering"
)
documents.forEachIndexed { index, text ->
val entity = SimpleNamedEntityData(
id = "doc-$index",
name = "Document $index",
description = text,
properties = mapOf("category" to "ml")
)
repository.save(entity)
}
// Perform vector search
val query = "machine learning classification"
val vectorResults = repository.vectorSearch(
request = TextSimilaritySearchRequest(query, topK = 10),
metadataFilter = null,
entityFilter = null
)
// Process results
vectorResults.forEach { result ->
println("Found: ${result.content.name} (score: ${result.score})")
}
// Clean up
println("Repository size: ${repository.size}")
repository.clear()Search operations specialized for working with AssetView content structures. Provides comprehensive search capabilities including vector search, text search, and regex search across asset-based content.
class AssetViewSearchOperations(
/**
* Asset view to search
* Contains the asset structure and content to be searched
*/
val assetView: AssetView,
/**
* Optional embedding service for vector search
* Required for vector similarity search operations
*/
val embeddingService: EmbeddingService? = null
) : CoreSearchOperations, FilteringVectorSearch, FilteringTextSearch,
RegexSearchOperations, FilteringRegexSearch, TypeRetrievalOperations {
/**
* Clear embedding cache
* Removes all cached embeddings to free memory
*/
fun clearEmbeddingCache()
/**
* Precompute embeddings for all content
* Improves search performance by calculating embeddings upfront
*/
fun precomputeEmbeddings()
// Implements all search operation interfaces:
// - vectorSearch() for semantic similarity
// - textSearch() for keyword matching
// - regexSearch() for pattern matching
// - Filtering variants of each search type
}Retrievable wrapper for Asset objects that enables assets to be used with search operations.
class AssetRetrievable(
override val id: String,
override val uri: String?,
override val metadata: Map<String, Any?>,
private val asset: Asset
) : Retrievable {
/**
* Get the underlying Asset
* @return Asset instance wrapped by this retrievable
*/
fun getAsset(): Asset
/**
* Get embeddable text representation
* @return String representation suitable for embedding generation
*/
override fun embeddableValue(): String
/**
* Get human-readable information string
* @param verbose Include detailed information
* @param indent Indentation level for formatting
* @return Formatted information string
*/
override fun infoString(verbose: Boolean?, indent: Int): String
}Basic Asset View Search
import com.embabel.agent.rag.service.support.*
// Create asset view search operations
val assetView: AssetView = // ... your asset view
val searchOps = AssetViewSearchOperations(
assetView = assetView,
embeddingService = myEmbeddingService
)
// Precompute embeddings for better performance
searchOps.precomputeEmbeddings()
// Perform vector search
val results = searchOps.vectorSearch(
request = TextSimilaritySearchRequest(
query = "configuration options",
topK = 10
),
clazz = AssetRetrievable::class.java
)
// Process results
results.forEach { result ->
val asset = (result.content as AssetRetrievable).getAsset()
println("Asset: ${asset.name}")
println("Score: ${result.score}")
println("URI: ${result.content.uri}")
}
// Clear cache when needed
searchOps.clearEmbeddingCache()Asset Search with Filtering
import com.embabel.agent.rag.service.support.*
val searchOps = AssetViewSearchOperations(
assetView = myAssetView,
embeddingService = myEmbeddingService
)
// Search with metadata filtering
val metadataFilter = mapOf(
"type" to "documentation",
"version" to "2.0"
)
val filteredResults = searchOps.vectorSearch(
request = TextSimilaritySearchRequest(
query = "API endpoints",
topK = 20
),
clazz = AssetRetrievable::class.java,
metadataFilter = metadataFilter
)
filteredResults.forEach { result ->
val asset = (result.content as AssetRetrievable).getAsset()
println("${asset.name}: ${asset.description}")
}Working with Asset Retrievables
import com.embabel.agent.rag.service.support.*
// Get asset from search result
val result = searchResults.first()
val retrievable = result.content as AssetRetrievable
// Access asset details
val asset = retrievable.getAsset()
println("Asset ID: ${asset.id}")
println("Asset name: ${asset.name}")
// Get embeddable representation
val embeddableText = retrievable.embeddableValue()
println("Embeddable text: $embeddableText")
// Get formatted info
val info = retrievable.infoString(verbose = true, indent = 2)
println(info)
// Access metadata
retrievable.metadata.forEach { (key, value) ->
println("$key: $value")
}File-based full-text search implementation for directory structures. Supports Lucene query syntax, regex patterns, and configurable file filtering.
class DirectoryTextSearch(
/**
* Directory path to search
* Root directory for recursive file search
*/
val directory: String,
/**
* Search configuration
* Controls which files to include/exclude and search limits
*/
val config: Config
) : TextSearch, RegexSearchOperations, TypeRetrievalOperations {
/**
* Configuration for directory text search
*/
data class Config(
/**
* File extensions to include in search
* Only files with these extensions will be processed
*/
val includedExtensions: Set<String> = setOf("md", "txt"),
/**
* Directories to exclude from search
* Common directories like .git, node_modules are excluded by default
*/
val excludedDirectories: Set<String> = setOf(".git", "node_modules"),
/**
* Maximum file size to process
* Files larger than this will be skipped (default: 10 MB)
*/
val maxFileSize: Long = 10_485_760, // 10 MB
/**
* Maximum search results to return
* Limits the total number of results returned
*/
val maxResults: Int = 100
) {
/**
* Create new config with different included extensions
*/
fun withIncludedExtensions(extensions: Set<String>): Config
/**
* Create new config with different excluded directories
*/
fun withExcludedDirectories(directories: Set<String>): Config
/**
* Create new config with different max file size
*/
fun withMaxFileSize(size: Long): Config
/**
* Create new config with different max results
*/
fun withMaxResults(max: Int): Config
}
/**
* Description of supported Lucene syntax
* Documents the query syntax available for text search
*/
override val luceneSyntaxNotes: String
// Implements TextSearch and RegexSearchOperations:
// - textSearch() with Lucene query support
// - regexSearch() for pattern matching
}Basic Directory Search
import com.embabel.agent.rag.service.support.*
import com.embabel.agent.rag.model.Chunk
// Create directory search
val directorySearch = DirectoryTextSearch(
directory = "/path/to/docs",
config = DirectoryTextSearch.Config(
includedExtensions = setOf("md", "txt", "adoc"),
excludedDirectories = setOf(".git", "node_modules", "target"),
maxFileSize = 5_242_880, // 5 MB
maxResults = 50
)
)
// Perform text search with Lucene syntax
val results = directorySearch.textSearch(
request = TextSimilaritySearchRequest(
query = "authentication AND (oauth OR jwt)",
topK = 20
),
clazz = Chunk::class.java
)
// Check Lucene syntax support
println("Lucene syntax: ${directorySearch.luceneSyntaxNotes}")
// Process results
results.forEach { result ->
println("File: ${result.content.uri}")
println("Text: ${result.content.text}")
println("Score: ${result.score}")
println("---")
}Directory Search with Regex
import com.embabel.agent.rag.service.support.*
val directorySearch = DirectoryTextSearch(
directory = "/path/to/codebase",
config = DirectoryTextSearch.Config(
includedExtensions = setOf("kt", "java"),
maxResults = 100
)
)
// Search for email addresses in code
val emailPattern = Regex("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
val emailResults = directorySearch.regexSearch(
regex = emailPattern,
topK = 50,
clazz = Chunk::class.java
)
emailResults.forEach { result ->
println("Found in: ${result.content.uri}")
println("Match: ${result.content.text}")
}
// Search for TODO comments
val todoPattern = Regex("TODO:.*")
val todoResults = directorySearch.regexSearch(
regex = todoPattern,
topK = 100,
clazz = Chunk::class.java
)
todoResults.forEach { result ->
println("TODO in: ${result.content.uri}")
println("${result.content.text}")
}Custom Configuration
import com.embabel.agent.rag.service.support.*
// Start with base config
val baseConfig = DirectoryTextSearch.Config()
// Customize with builder methods
val customConfig = baseConfig
.withIncludedExtensions(setOf("md", "mdx", "rst"))
.withExcludedDirectories(setOf(".git", "build", "dist", "target"))
.withMaxFileSize(20_971_520) // 20 MB
.withMaxResults(200)
val search = DirectoryTextSearch(
directory = "/docs",
config = customConfig
)
// Search with custom configuration
val results = search.textSearch(
request = TextSimilaritySearchRequest("API documentation", topK = 50),
clazz = Chunk::class.java
)Advanced Query Patterns
import com.embabel.agent.rag.service.support.*
val search = DirectoryTextSearch(
directory = "/project/docs",
config = DirectoryTextSearch.Config(
includedExtensions = setOf("md", "txt"),
maxResults = 100
)
)
// Boolean queries
val booleanQuery = "REST AND (GET OR POST) NOT deprecated"
val booleanResults = search.textSearch(
request = TextSimilaritySearchRequest(booleanQuery, topK = 20),
clazz = Chunk::class.java
)
// Phrase queries
val phraseQuery = "\"database connection pool\""
val phraseResults = search.textSearch(
request = TextSimilaritySearchRequest(phraseQuery, topK = 15),
clazz = Chunk::class.java
)
// Wildcard queries
val wildcardQuery = "config*"
val wildcardResults = search.textSearch(
request = TextSimilaritySearchRequest(wildcardQuery, topK = 25),
clazz = Chunk::class.java
)Utility object providing text-based similarity calculations for matching query terms against text content.
object TextMath {
/**
* Calculate text match score based on query term presence
* Computes a score between 0.0 and 1.0 based on how many query terms
* are present in the text. Handles term frequency and document length.
*
* @param text Text to score
* @param queryTerms List of query terms to match
* @return Match score between 0.0 and 1.0, higher is better
*/
fun textMatchScore(text: String, queryTerms: List<String>): Double
}Utility object providing vector similarity calculations and conversion operations for working with embeddings.
object VectorMath {
/**
* Calculate cosine similarity between two vectors
* Computes the cosine of the angle between two vectors,
* commonly used for measuring semantic similarity between embeddings.
*
* @param a First vector
* @param b Second vector
* @return Cosine similarity score between -1.0 and 1.0
* 1.0 = identical vectors, 0.0 = orthogonal, -1.0 = opposite
*/
fun cosineSimilarity(a: FloatArray, b: FloatArray): Double
/**
* Convert float array to byte array for storage
* Enables efficient storage of embeddings in databases or files
*
* @param floatArray Float array to convert
* @return Byte array representation (4 bytes per float)
*/
fun floatArrayToBytes(floatArray: FloatArray): ByteArray
/**
* Convert byte array back to float array
* Reconstructs float array from byte representation
*
* @param bytes Byte array to convert
* @return Float array representation
*/
fun bytesToFloatArray(bytes: ByteArray): FloatArray
}Text Similarity Scoring
import com.embabel.agent.rag.service.support.TextMath
// Calculate text match score
val text = "This is a document about machine learning and artificial intelligence"
val queryTerms = listOf("machine", "learning", "AI", "neural")
val score = TextMath.textMatchScore(text, queryTerms)
println("Match score: $score") // Higher score = more query terms present
// Use for custom ranking
val documents = listOf(
"Document about machine learning algorithms",
"Document about web development frameworks",
"Document about AI and neural networks"
)
val query = "machine learning neural networks"
val queryTerms = query.split(" ")
val rankedDocuments = documents
.map { doc -> doc to TextMath.textMatchScore(doc, queryTerms) }
.sortedByDescending { it.second }
.map { it.first }
println("Ranked documents:")
rankedDocuments.forEachIndexed { index, doc ->
println("${index + 1}. $doc")
}Vector Similarity Calculations
import com.embabel.agent.rag.service.support.VectorMath
// Calculate cosine similarity between embeddings
val embedding1 = FloatArray(384) { kotlin.random.Random.nextFloat() }
val embedding2 = FloatArray(384) { kotlin.random.Random.nextFloat() }
val similarity = VectorMath.cosineSimilarity(embedding1, embedding2)
println("Cosine similarity: $similarity")
// Compare multiple embeddings
val embeddings = listOf(
"doc1" to embedding1,
"doc2" to embedding2,
"doc3" to FloatArray(384) { kotlin.random.Random.nextFloat() }
)
val query = FloatArray(384) { kotlin.random.Random.nextFloat() }
val similarities = embeddings.map { (id, emb) ->
id to VectorMath.cosineSimilarity(query, emb)
}
similarities.sortedByDescending { it.second }.forEach { (id, sim) ->
println("$id: $sim")
}Embedding Storage and Retrieval
import com.embabel.agent.rag.service.support.VectorMath
import java.io.*
// Store embeddings as bytes
val embedding = FloatArray(384) { 0.5f }
val bytes = VectorMath.floatArrayToBytes(embedding)
println("Byte size: ${bytes.size}") // 384 * 4 = 1536 bytes
// Write to file
File("/tmp/embedding.bin").writeBytes(bytes)
// Read from file
val loadedBytes = File("/tmp/embedding.bin").readBytes()
val retrievedEmbedding = VectorMath.bytesToFloatArray(loadedBytes)
// Verify integrity
assert(embedding.contentEquals(retrievedEmbedding))
println("Embedding successfully stored and retrieved")Custom Embedding Store
import com.embabel.agent.rag.service.support.VectorMath
import java.io.*
class EmbeddingStore(private val directory: File) {
init {
directory.mkdirs()
}
fun saveEmbedding(id: String, embedding: FloatArray) {
val bytes = VectorMath.floatArrayToBytes(embedding)
val file = File(directory, "$id.emb")
file.writeBytes(bytes)
}
fun loadEmbedding(id: String): FloatArray? {
val file = File(directory, "$id.emb")
if (!file.exists()) return null
val bytes = file.readBytes()
return VectorMath.bytesToFloatArray(bytes)
}
fun findSimilar(
queryEmbedding: FloatArray,
topK: Int = 10
): List<Pair<String, Double>> {
return directory.listFiles { file -> file.extension == "emb" }
?.map { file ->
val id = file.nameWithoutExtension
val embedding = loadEmbedding(id)!!
val similarity = VectorMath.cosineSimilarity(queryEmbedding, embedding)
id to similarity
}
?.sortedByDescending { it.second }
?.take(topK)
?: emptyList()
}
}
// Usage
val store = EmbeddingStore(File("/tmp/embeddings"))
// Store embeddings
val docs = listOf("doc1", "doc2", "doc3")
docs.forEach { id ->
val embedding = FloatArray(384) { kotlin.random.Random.nextFloat() }
store.saveEmbedding(id, embedding)
}
// Find similar
val query = FloatArray(384) { kotlin.random.Random.nextFloat() }
val similar = store.findSimilar(query, topK = 5)
similar.forEach { (id, score) ->
println("$id: $score")
}Custom Similarity Search
import com.embabel.agent.rag.service.support.VectorMath
fun findMostSimilar(
queryEmbedding: FloatArray,
candidateEmbeddings: List<Pair<String, FloatArray>>,
threshold: Double = 0.0
): List<Pair<String, Double>> {
return candidateEmbeddings
.map { (id, embedding) ->
val similarity = VectorMath.cosineSimilarity(queryEmbedding, embedding)
id to similarity
}
.filter { it.second >= threshold }
.sortedByDescending { it.second }
}
// Use in custom search
val query = FloatArray(384) { /* query embedding */ }
val candidates = listOf(
"doc1" to FloatArray(384) { /* embedding 1 */ },
"doc2" to FloatArray(384) { /* embedding 2 */ },
"doc3" to FloatArray(384) { /* embedding 3 */ }
)
val ranked = findMostSimilar(
queryEmbedding = query,
candidateEmbeddings = candidates,
threshold = 0.5 // Only return results with similarity >= 0.5
)
ranked.take(5).forEach { (id, score) ->
println("$id: $score")
}Combining text and vector scores for better search results.
import com.embabel.agent.rag.service.support.*
import com.embabel.agent.rag.model.*
// Create repository with embedding support
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = DataDictionary.empty(),
embeddingService = myEmbeddingService
)
// Populate with documents
val documents = listOf(
"Machine learning algorithms for classification tasks",
"Neural networks and deep learning architectures",
"Data preprocessing and feature engineering techniques"
)
documents.forEachIndexed { index, text ->
val entity = SimpleNamedEntityData(
id = "doc-$index",
name = "Document $index",
description = text,
properties = mapOf("category" to "ml")
)
repository.save(entity)
}
// Hybrid search: combine vector and text scores
val query = "machine learning classification"
val queryTerms = query.split(" ")
// Get vector search results
val vectorResults = repository.vectorSearch(
request = TextSimilaritySearchRequest(query, topK = 10),
metadataFilter = null,
entityFilter = null
)
// Calculate combined scores
val combinedResults = vectorResults.map { result ->
val textScore = TextMath.textMatchScore(
result.content.description,
queryTerms
)
// Weighted combination: 70% vector, 30% text
val combinedScore = result.score * 0.7 + textScore * 0.3
result to combinedScore
}.sortedByDescending { it.second }
// Display results
combinedResults.forEach { (result, score) ->
println("${result.content.name}: $score")
println(" Vector: ${result.score}, Text: ${TextMath.textMatchScore(result.content.description, queryTerms)}")
}
println("\nRepository size: ${repository.size}")
repository.clear()Searching across different content sources simultaneously.
import com.embabel.agent.rag.service.support.*
// Set up multiple search sources
val directorySearch = DirectoryTextSearch(
directory = "/docs",
config = DirectoryTextSearch.Config(
includedExtensions = setOf("md", "txt"),
maxResults = 50
)
)
val assetSearch = AssetViewSearchOperations(
assetView = myAssetView,
embeddingService = myEmbeddingService
)
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = myDataDictionary,
embeddingService = myEmbeddingService
)
// Search across all sources
val query = "API authentication"
val request = TextSimilaritySearchRequest(query, topK = 20)
val directoryResults = directorySearch.textSearch(request, Chunk::class.java)
val assetResults = assetSearch.vectorSearch(request, AssetRetrievable::class.java)
val repoResults = repository.vectorSearch(request, null, null)
// Combine and deduplicate results
val allResults = mutableListOf<Pair<String, Double>>()
allResults.addAll(directoryResults.map { it.content.uri ?: it.content.id to it.score })
allResults.addAll(assetResults.map { it.content.uri ?: it.content.id to it.score })
allResults.addAll(repoResults.map { it.content.id to it.score })
val uniqueResults = allResults
.groupBy { it.first }
.mapValues { (_, scores) -> scores.maxOf { it.second } }
.toList()
.sortedByDescending { it.second }
uniqueResults.take(10).forEach { (id, score) ->
println("$id: $score")
}Using utilities in comprehensive test suites.
import com.embabel.agent.rag.service.support.*
import org.junit.jupiter.api.*
class RAGSystemTest {
private lateinit var repository: InMemoryNamedEntityDataRepository
private lateinit var directorySearch: DirectoryTextSearch
@BeforeEach
fun setUp() {
repository = InMemoryNamedEntityDataRepository(
dataDictionary = DataDictionary.empty(),
embeddingService = mockEmbeddingService
)
directorySearch = DirectoryTextSearch(
directory = testDataDirectory,
config = DirectoryTextSearch.Config(
includedExtensions = setOf("txt"),
maxResults = 100
)
)
}
@AfterEach
fun tearDown() {
repository.clear()
}
@Test
fun testVectorSearchAccuracy() {
// Populate test data
val testDocs = loadTestDocuments()
testDocs.forEach { repository.save(it) }
// Perform search
val results = repository.vectorSearch(
request = TextSimilaritySearchRequest("test query", topK = 10),
metadataFilter = null,
entityFilter = null
)
// Validate results
assertTrue(results.isNotEmpty())
assertTrue(results.first().score > 0.5)
}
@Test
fun testHybridScoring() {
val query = "machine learning"
val queryTerms = query.split(" ")
val vectorScore = 0.8
val textScore = TextMath.textMatchScore(
"machine learning algorithms",
queryTerms
)
val combinedScore = vectorScore * 0.7 + textScore * 0.3
assertTrue(combinedScore > 0.7)
}
}import com.embabel.agent.rag.service.support.*
val assetSearch = AssetViewSearchOperations(
assetView = largeAssetView,
embeddingService = myEmbeddingService
)
// Precompute embeddings once for multiple searches
assetSearch.precomputeEmbeddings()
// Perform multiple searches efficiently
val queries = listOf("query1", "query2", "query3")
queries.forEach { query ->
val results = assetSearch.vectorSearch(
TextSimilaritySearchRequest(query, topK = 10),
AssetRetrievable::class.java
)
processResults(results)
}
// Clear cache when switching to different asset view
assetSearch.clearEmbeddingCache()import com.embabel.agent.rag.service.support.*
// Use in-memory repository for bounded datasets
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = myDataDictionary,
embeddingService = myEmbeddingService
)
// Monitor size
if (repository.size > 10000) {
println("Warning: Repository size exceeds threshold")
// Consider clearing or archiving old data
repository.clear()
}import com.embabel.agent.rag.service.support.*
val repository = InMemoryNamedEntityDataRepository(
dataDictionary = myDataDictionary,
embeddingService = myEmbeddingService
)
// Batch save operations
val entities = (1..1000).map { i ->
SimpleNamedEntityData(
id = "entity-$i",
name = "Entity $i",
description = "Description for entity $i"
)
}
// Save in batches for better performance
entities.chunked(100).forEach { batch ->
batch.forEach { repository.save(it) }
}try {
val results = repository.vectorSearch(request, null, null)
processResults(results)
} catch (e: Exception) {
logger.error("Search failed", e)
// Fallback to text search
val textResults = repository.textSearch(request, null, null)
processResults(textResults)
} finally {
repository.clear()
}// Define reusable configurations
val testConfig = DirectoryTextSearch.Config()
.withIncludedExtensions(setOf("txt"))
.withMaxResults(50)
val productionConfig = DirectoryTextSearch.Config()
.withIncludedExtensions(setOf("md", "txt", "adoc"))
.withMaxFileSize(20_971_520)
.withMaxResults(200)