Discover and Export available Agent(s) as MCP Servers
Comprehensive guide to performance characteristics, optimization strategies, benchmarking, caching, and reactive backpressure handling in embabel-agent-mcpserver.
| Aspect | Synchronous Mode | Asynchronous Mode |
|---|---|---|
| Throughput (idle tools) | 500-1000 req/s | 10,000+ req/s |
| Throughput (I/O-bound) | Limited by threads | 50,000+ req/s |
| Latency (p50) | 10-50ms | 5-20ms |
| Latency (p99) | 100-500ms | 20-100ms |
| Memory per request | ~1MB (thread stack) | ~1KB (heap) |
| Thread count | 100-500 | 10-50 |
| CPU efficiency | Moderate | High |
| Scaling | Vertical | Horizontal + Vertical |
Characteristics:
Performance Profile: { .api }
// Typical sync mode configuration
server.tomcat.threads.max=200 // Max concurrent requests
server.tomcat.threads.min-spare=10 // Min idle threads
server.tomcat.accept-count=100 // Queue size
// Memory calculation:
// 200 threads × 1MB stack = 200MB thread overhead
// + ~300MB heap = ~500MB totalBest Use Cases:
Performance Example: { .api }
@Service
class SyncPerformancePublisher : McpExportToolCallbackPublisher {
// Fast, non-blocking tool
private val cpuBoundTool = object : ToolCallback {
override fun getName() = "calculate"
override fun getDescription() = "CPU-intensive calculation"
override fun call(functionArguments: String): String {
// Pure CPU work, no I/O
val result = performCalculation(functionArguments)
return result.toString()
}
}
override val toolCallbacks = listOf(cpuBoundTool)
override fun infoString(verbose: Boolean?, indent: Int) = "SyncPerformancePublisher"
private fun performCalculation(args: String): Long {
// Simulated CPU work
return (1..1000).sumOf { it.toLong() }
}
}
// Performance: ~50,000 req/s on 8-core machineCharacteristics:
Performance Profile: { .api }
// Typical async mode configuration
spring.reactor.netty.ioWorkerCount=10 // I/O threads (default: CPU cores)
spring.reactor.netty.ioSelectCount=1 // Selector threads
reactor.netty.pool.maxConnections=500 // Connection pool
// Memory calculation:
// 10 threads × 1MB stack = 10MB thread overhead
// + ~200MB heap = ~210MB total
// Handles 10,000+ concurrent connectionsBest Use Cases:
Performance Example: { .api }
@Service
class AsyncPerformancePublisher : McpAsyncResourcePublisher {
override fun resources(): List<AsyncResourceSpecification> {
return listOf(
AsyncResourceSpecification(
Resource(
"app://data/users",
"Users",
"User data from database",
"application/json",
null
)
) { exchange, request ->
// Non-blocking database query
userRepository.findAllAsync()
.subscribeOn(Schedulers.boundedElastic())
.map { users ->
ReadResourceResult(
listOf(
TextResourceContents(
"app://data/users",
"application/json",
objectMapper.writeValueAsString(users)
)
)
)
}
}
)
}
override fun infoString(verbose: Boolean?, indent: Int) = "AsyncPerformancePublisher"
companion object {
private val objectMapper = ObjectMapper()
}
}
// Performance: 20,000+ req/s for database queriesChoose Synchronous Mode When:
✓ Existing servlet-based application
✓ Simple tool implementations
✓ Low concurrent request count (< 500)
✓ Team unfamiliar with reactive programming
✓ CPU-bound workloads
✓ Quick prototyping/developmentChoose Asynchronous Mode When:
✓ High concurrent request count (> 1000)
✓ I/O-bound workloads (database, HTTP, files)
✓ Reactive Spring application
✓ Memory-constrained environment
✓ Horizontal scaling requirements
✓ Non-blocking tool implementationsSynchronous Registration: { .api }
@Service
class BulkRegistrationService(
private val serverStrategy: McpServerStrategy
) {
fun registerTools(tools: List<ToolCallback>) {
val start = System.nanoTime()
tools.forEach { tool ->
serverStrategy.addToolCallback(tool)
.block() // Blocking in sync mode
}
val duration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)
logger.info("Registered ${tools.size} tools in ${duration}ms")
logger.info("Average: ${duration / tools.size}ms per tool")
}
companion object {
private val logger = LoggerFactory.getLogger(BulkRegistrationService::class.java)
}
}
// Performance: ~50-100 microseconds per tool
// 1000 tools: 50-100msParallel Registration (Async Mode): { .api }
@Service
class ParallelRegistrationService(
private val serverStrategy: McpServerStrategy
) {
fun registerToolsParallel(tools: List<ToolCallback>): Mono<Void> {
val start = System.nanoTime()
return Flux.fromIterable(tools)
.flatMap { tool ->
serverStrategy.addToolCallback(tool)
.doOnSuccess {
logger.debug("Registered: ${tool.name}")
}
}
.then()
.doOnSuccess {
val duration = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)
logger.info("Registered ${tools.size} tools in ${duration}ms")
logger.info("Average: ${duration / tools.size}ms per tool")
}
}
companion object {
private val logger = LoggerFactory.getLogger(ParallelRegistrationService::class.java)
}
}
// Performance: ~20-30 microseconds per tool (parallel)
// 1000 tools: 20-30msDefer Expensive Tool Creation: { .api }
@Service
class LazyToolPublisher : McpExportToolCallbackPublisher {
// Computed only once when first accessed
override val toolCallbacks: List<ToolCallback> by lazy {
logger.info("Initializing tools...")
val start = System.currentTimeMillis()
// Expensive tool creation
val tools = (1..100).map { index ->
createExpensiveTool(index)
}
val duration = System.currentTimeMillis() - start
logger.info("Initialized ${tools.size} tools in ${duration}ms")
tools
}
override fun infoString(verbose: Boolean?, indent: Int) =
"LazyToolPublisher: ${toolCallbacks.size} tools"
private fun createExpensiveTool(index: Int): ToolCallback {
// Simulated expensive initialization
Thread.sleep(10) // 10ms per tool
return SimpleTool("tool_$index", "Tool $index")
}
companion object {
private val logger = LoggerFactory.getLogger(LazyToolPublisher::class.java)
}
}
// Without lazy: 1000ms delay on app startup
// With lazy: 0ms on startup, 1000ms on first accessComponent Scanning Overhead: { .api }
// Spring component scanning is performed once at startup
// Typical overhead: 100-500ms for 10-50 publishers
// Optimize with explicit package scanning:
@SpringBootApplication
@ComponentScan(basePackages = [
"com.example.publishers", // Only scan publisher packages
"com.example.services"
])
class Application
// Further optimization with indexed components:
// Add to build.gradle.kts:
dependencies {
annotationProcessor("org.springframework.boot:spring-boot-configuration-processor")
}
// Reduces scanning time by ~50%Minimal Overhead: { .api }
@Service
class InvocationBenchmark(
private val serverStrategy: McpServerStrategy,
private val toolRegistry: ToolRegistry
) {
fun benchmarkInvocation(toolName: String, iterations: Int) {
val start = System.nanoTime()
repeat(iterations) {
toolRegistry.findToolCallback(toolName)
.flatMap { tool ->
Mono.fromCallable {
tool.call("{}") // Minimal invocation
}
}
.block()
}
val duration = TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - start)
logger.info("$iterations invocations in ${duration}μs")
logger.info("Average: ${duration / iterations}μs per invocation")
}
companion object {
private val logger = LoggerFactory.getLogger(InvocationBenchmark::class.java)
}
}
// Performance: 2-5 microseconds overhead per invocation
// Actual tool execution time adds to thisRegistry Lookup Optimization: { .api }
class OptimizedToolRegistry : ToolRegistry {
// ConcurrentHashMap for O(1) lookup
private val tools = ConcurrentHashMap<String, ToolCallback>()
// Optional: Secondary index for prefix queries
private val prefixIndex = ConcurrentHashMap<String, List<ToolCallback>>()
override fun findToolCallback(toolName: String): Mono<ToolCallback> {
return Mono.justOrEmpty(tools[toolName])
.switchIfEmpty(Mono.error(ToolNotFoundException(toolName)))
}
override fun listToolCallbacks(): Mono<List<ToolCallback>> {
return Mono.just(tools.values.toList())
}
fun findByPrefix(prefix: String): Mono<List<ToolCallback>> {
// Use index for fast prefix queries
return Mono.justOrEmpty(prefixIndex[prefix])
.switchIfEmpty(
Mono.just(
tools.values.filter { it.name.startsWith(prefix) }
)
)
}
override fun register(toolCallback: ToolCallback): Mono<Void> {
return Mono.fromRunnable {
tools[toolCallback.name] = toolCallback
// Update prefix index
val prefix = toolCallback.name.substringBefore('_')
prefixIndex.compute(prefix) { _, existing ->
(existing ?: emptyList()) + toolCallback
}
}
}
}
// Performance:
// - Exact lookup: 100-200 nanoseconds
// - Prefix lookup (indexed): 1-2 microseconds
// - List all: 1-5 microseconds (depends on count)Optimize JSON Serialization: { .api }
@Service
class OptimizedSerializationPublisher : McpExportToolCallbackPublisher {
// Reuse ObjectMapper (thread-safe)
private val objectMapper = ObjectMapper().apply {
// Disable features for better performance
disable(SerializationFeature.INDENT_OUTPUT)
disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
// Use faster date format
setDateFormat(SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"))
// Enable faster JSON processing
configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true)
}
private val fastTool = object : ToolCallback {
override fun getName() = "fast_serialization"
override fun getDescription() = "Fast JSON serialization"
override fun call(functionArguments: String): String {
// Parse input
val input = objectMapper.readValue(functionArguments, InputType::class.java)
// Process
val result = process(input)
// Serialize output (fast)
return objectMapper.writeValueAsString(result)
}
}
override val toolCallbacks = listOf(fastTool)
override fun infoString(verbose: Boolean?, indent: Int) = "OptimizedSerializationPublisher"
private fun process(input: InputType): OutputType = OutputType("result")
data class InputType(val data: String)
data class OutputType(val result: String)
}
// Performance improvement: 30-40% faster than default ObjectMapperIn-Memory Cache: { .api }
@Service
class CachedToolPublisher : McpExportToolCallbackPublisher {
private val cache = Caffeine.newBuilder()
.maximumSize(1000)
.expireAfterWrite(5, TimeUnit.MINUTES)
.recordStats()
.build<String, String>()
private val expensiveTool = object : ToolCallback {
override fun getName() = "expensive_operation"
override fun getDescription() = "Expensive operation with caching"
override fun call(functionArguments: String): String {
val cacheKey = "tool:${getName()}:$functionArguments"
return cache.get(cacheKey) { key ->
logger.info("Cache miss for: $key")
performExpensiveOperation(functionArguments)
}!!
}
}
override val toolCallbacks = listOf(expensiveTool)
override fun infoString(verbose: Boolean?, indent: Int) = "CachedToolPublisher"
private fun performExpensiveOperation(args: String): String {
// Simulated expensive operation
Thread.sleep(100)
return "result"
}
fun getCacheStats(): String {
val stats = cache.stats()
return """
Cache Stats:
- Requests: ${stats.requestCount()}
- Hits: ${stats.hitCount()} (${stats.hitRate() * 100}%)
- Misses: ${stats.missCount()}
- Evictions: ${stats.evictionCount()}
""".trimIndent()
}
companion object {
private val logger = LoggerFactory.getLogger(CachedToolPublisher::class.java)
}
}
// Performance:
// - Cache hit: < 1 microsecond
// - Cache miss: 100ms (operation time)
// - 95%+ hit rate: 20x speedupStatic Resource Caching: { .api }
@Service
class CachedResourcePublisher : McpResourcePublisher {
// Cache expensive resource loading
private val resourceCache = ConcurrentHashMap<String, String>()
override fun resources(): List<SyncResourceSpecification> {
return listOf(
createCachedResource("app://docs/large-file", "LargeFile")
)
}
private fun createCachedResource(uri: String, name: String): SyncResourceSpecification {
return SyncResourceSpecificationFactory.syncResourceSpecification(
uri = uri,
name = name,
description = "Cached large file",
resourceLoader = { exchange ->
resourceCache.computeIfAbsent(uri) {
logger.info("Loading resource: $uri")
loadLargeFile()
}
},
mimeType = "text/plain"
)
}
override fun infoString(verbose: Boolean?, indent: Int) = "CachedResourcePublisher"
private fun loadLargeFile(): String {
// Simulated expensive file load (10MB file)
Thread.sleep(500)
return "file content..."
}
companion object {
private val logger = LoggerFactory.getLogger(CachedResourcePublisher::class.java)
}
}
// Performance:
// - First load: 500ms
// - Subsequent loads: < 1msCache Publisher Initialization: { .api }
@Service
class CachedPublisherInit : McpExportToolCallbackPublisher {
@Cacheable("publisher-tools")
override val toolCallbacks: List<ToolCallback>
get() {
logger.info("Computing tools (expensive)...")
return (1..100).map { index ->
Thread.sleep(10) // Expensive per-tool init
SimpleTool("tool_$index", "Tool $index")
}
}
override fun infoString(verbose: Boolean?, indent: Int) =
"CachedPublisherInit: ${toolCallbacks.size} tools"
companion object {
private val logger = LoggerFactory.getLogger(CachedPublisherInit::class.java)
}
}
// Configuration
@Configuration
@EnableCaching
class CacheConfig {
@Bean
fun cacheManager(): CacheManager {
return CaffeineCacheManager("publisher-tools").apply {
setCaffeine(Caffeine.newBuilder()
.maximumSize(100)
.expireAfterWrite(1, TimeUnit.HOURS))
}
}
}Backpressure Scenario:
Tool produces data faster than client consumes
↓
Memory buffers grow unbounded
↓
OutOfMemoryErrorBuffer Strategy: { .api }
@Service
class BackpressurePublisher : McpAsyncResourcePublisher {
override fun resources(): List<AsyncResourceSpecification> {
return listOf(
createStreamingResource()
)
}
private fun createStreamingResource(): AsyncResourceSpecification {
return AsyncResourceSpecification(
Resource(
"app://stream/data",
"StreamingData",
"Streaming data with backpressure",
"application/json",
null
)
) { exchange, request ->
Flux.range(1, 1000000) // Large dataset
.map { index ->
// Simulated data generation
"data-$index"
}
.onBackpressureBuffer(
1000, // Buffer up to 1000 items
BufferOverflowStrategy.DROP_OLDEST // Drop old items if full
)
.collectList()
.map { items ->
ReadResourceResult(
listOf(
TextResourceContents(
"app://stream/data",
"application/json",
objectMapper.writeValueAsString(items)
)
)
)
}
}
}
override fun infoString(verbose: Boolean?, indent: Int) = "BackpressurePublisher"
companion object {
private val objectMapper = ObjectMapper()
}
}Throttling Strategy: { .api }
@Service
class ThrottledPublisher : McpAsyncResourcePublisher {
override fun resources(): List<AsyncResourceSpecification> {
return listOf(
createThrottledResource()
)
}
private fun createThrottledResource(): AsyncResourceSpecification {
return AsyncResourceSpecification(
Resource(
"app://throttled/data",
"ThrottledData",
"Data with rate limiting",
"application/json",
null
)
) { exchange, request ->
Flux.range(1, 1000)
.delayElements(Duration.ofMillis(10)) // Max 100 items/second
.limitRate(100) // Request 100 items at a time
.collectList()
.map { items ->
ReadResourceResult(
listOf(
TextResourceContents(
"app://throttled/data",
"application/json",
objectMapper.writeValueAsString(items)
)
)
)
}
}
}
override fun infoString(verbose: Boolean?, indent: Int) = "ThrottledPublisher"
companion object {
private val objectMapper = ObjectMapper()
}
}Concurrent Request Limiter: { .api }
@Service
class RateLimitedPublisher : McpExportToolCallbackPublisher {
private val semaphore = Semaphore(10) // Max 10 concurrent executions
private val limitedTool = object : ToolCallback {
override fun getName() = "rate_limited_tool"
override fun getDescription() = "Tool with concurrency limits"
override fun call(functionArguments: String): String {
if (!semaphore.tryAcquire(100, TimeUnit.MILLISECONDS)) {
throw RateLimitException("Too many concurrent requests")
}
try {
return performOperation(functionArguments)
} finally {
semaphore.release()
}
}
}
override val toolCallbacks = listOf(limitedTool)
override fun infoString(verbose: Boolean?, indent: Int) = "RateLimitedPublisher"
private fun performOperation(args: String): String {
// Expensive operation
Thread.sleep(100)
return "result"
}
class RateLimitException(message: String) : RuntimeException(message)
}Memory-Efficient Tool Storage: { .api }
class CompactToolRegistry : ToolRegistry {
// Store only essential data, not full tool instances
private val toolMetadata = ConcurrentHashMap<String, ToolMetadata>()
private val toolFactory: ToolFactory = DefaultToolFactory()
override fun register(toolCallback: ToolCallback): Mono<Void> {
return Mono.fromRunnable {
val metadata = ToolMetadata(
name = toolCallback.name,
description = toolCallback.description,
schema = toolCallback.inputTypeSchema
)
toolMetadata[toolCallback.name] = metadata
}
}
override fun findToolCallback(toolName: String): Mono<ToolCallback> {
return Mono.fromCallable {
val metadata = toolMetadata[toolName]
?: throw ToolNotFoundException(toolName)
// Recreate tool on demand
toolFactory.createTool(metadata)
}
}
data class ToolMetadata(
val name: String,
val description: String,
val schema: String?
)
}
// Memory savings: 50-70% for large tool countsTool Callback Pooling: { .api }
@Service
class PooledToolPublisher : McpExportToolCallbackPublisher {
private val toolPool = object : GenericObjectPool<ExpensiveTool>(
object : BasePooledObjectFactory<ExpensiveTool>() {
override fun create() = ExpensiveTool()
override fun wrap(obj: ExpensiveTool) = DefaultPooledObject(obj)
}
) {
init {
maxTotal = 50
maxIdle = 10
minIdle = 5
}
}
private val pooledToolCallback = object : ToolCallback {
override fun getName() = "pooled_tool"
override fun getDescription() = "Tool with object pooling"
override fun call(functionArguments: String): String {
val tool = toolPool.borrowObject()
try {
return tool.execute(functionArguments)
} finally {
toolPool.returnObject(tool)
}
}
}
override val toolCallbacks = listOf(pooledToolCallback)
override fun infoString(verbose: Boolean?, indent: Int) = "PooledToolPublisher"
class ExpensiveTool {
// Expensive to create, reusable
private val buffer = ByteArray(1024 * 1024) // 1MB buffer
fun execute(args: String): String {
// Use buffer for processing
return "result"
}
}
}Stateless Server Design: { .api }
// All state externalized for horizontal scaling
@Service
class StatelessPublisher(
private val redisTemplate: RedisTemplate<String, String>,
private val metricsRegistry: MeterRegistry
) : McpExportToolCallbackPublisher {
private val statelessTool = object : ToolCallback {
override fun getName() = "stateless_tool"
override fun getDescription() = "Stateless tool for horizontal scaling"
override fun call(functionArguments: String): String {
// Get state from Redis (shared across instances)
val state = redisTemplate.opsForValue().get("tool-state")
// Process with external state
val result = process(functionArguments, state)
// Update external state
redisTemplate.opsForValue().set("tool-state", result)
// Record metrics (centralized)
metricsRegistry.counter("tool.invocations", "tool", getName()).increment()
return result
}
}
override val toolCallbacks = listOf(statelessTool)
override fun infoString(verbose: Boolean?, indent: Int) = "StatelessPublisher"
private fun process(args: String, state: String?): String {
return "processed"
}
}
// Scaling: 10 instances handle 100,000+ req/sResource Allocation: { .api }
# JVM Heap Configuration
-Xms2g # Initial heap
-Xmx4g # Maximum heap
-XX:MetaspaceSize=256m # Metaspace
-XX:MaxMetaspaceSize=512m
# GC Configuration (G1GC recommended)
-XX:+UseG1GC
-XX:MaxGCPauseMillis=200
-XX:ParallelGCThreads=8
-XX:ConcGCThreads=2
# Thread Configuration
server.tomcat.threads.max=400 # Sync mode
spring.reactor.netty.ioWorkerCount=16 # Async modeMicrometer Integration: { .api }
@Service
class MetricsPublisher(
private val meterRegistry: MeterRegistry
) : McpExportToolCallbackPublisher {
private val instrumentedTool = object : ToolCallback {
override fun getName() = "instrumented_tool"
override fun getDescription() = "Tool with performance metrics"
override fun call(functionArguments: String): String {
// Record invocation count
meterRegistry.counter("tool.invocations", "tool", getName()).increment()
// Record execution time
return Timer.builder("tool.execution.time")
.tag("tool", getName())
.register(meterRegistry)
.recordCallable {
performOperation(functionArguments)
}!!
}
}
override val toolCallbacks = listOf(instrumentedTool)
override fun infoString(verbose: Boolean?, indent: Int) = "MetricsPublisher"
private fun performOperation(args: String): String {
Thread.sleep(Random.nextLong(10, 100))
return "result"
}
}
// Metrics available at /actuator/metricsCustom Health Indicator: { .api }
@Component
class McpServerHealthIndicator(
private val serverStrategy: McpServerStrategy,
private val toolRegistry: ToolRegistry
) : HealthIndicator {
override fun health(): Health {
return try {
val serverInfo = serverStrategy.getServerInfo().block(Duration.ofSeconds(1))
val toolCount = toolRegistry.listToolCallbacks()
.map { it.size }
.block(Duration.ofSeconds(1))
if (toolCount == null || toolCount == 0) {
Health.down()
.withDetail("reason", "No tools registered")
.build()
} else {
Health.up()
.withDetail("server", serverInfo?.name)
.withDetail("mode", serverInfo?.executionMode)
.withDetail("tools", toolCount)
.build()
}
} catch (e: Exception) {
Health.down()
.withException(e)
.build()
}
}
}Async Mode Tuning: { .api }
# Reactor Netty Configuration
spring.reactor.netty.ioWorkerCount=16
spring.reactor.netty.ioSelectCount=2
reactor.netty.pool.maxConnections=1000
reactor.netty.pool.acquireTimeout=45000
reactor.netty.pool.maxIdleTime=10000
# Backpressure
reactor.bufferSize.small=256
reactor.bufferSize.x=512
# Schedulers
reactor.schedulers.defaultPoolSize=10
reactor.schedulers.defaultQueueSize=100000Sync Mode Tuning: { .api }
# Tomcat Configuration
server.tomcat.threads.max=500
server.tomcat.threads.min-spare=25
server.tomcat.accept-count=200
server.tomcat.max-connections=10000
server.tomcat.connection-timeout=20000
# Connection pool (for I/O operations)
spring.datasource.hikari.maximum-pool-size=50
spring.datasource.hikari.minimum-idle=10
spring.datasource.hikari.connection-timeout=30000JMH Benchmark: { .api }
@State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@Warmup(iterations = 3, time = 5)
@Measurement(iterations = 5, time = 10)
@Fork(1)
open class ToolInvocationBenchmark {
private lateinit var registry: ToolRegistry
private lateinit var tool: ToolCallback
@Setup
fun setup() {
registry = InMemoryToolRegistry()
tool = SimpleTool("test", "Test tool")
registry.register(tool).block()
}
@Benchmark
fun benchmarkToolLookup(): ToolCallback? {
return registry.findToolCallback("test").block()
}
@Benchmark
fun benchmarkToolInvocation(): String? {
return tool.call("{}")
}
@Benchmark
fun benchmarkFullCycle(): String? {
return registry.findToolCallback("test")
.map { it.call("{}") }
.block()
}
}
// Results (example):
// benchmarkToolLookup: 5,000,000 ops/s (200ns/op)
// benchmarkToolInvocation: 100,000 ops/s (10μs/op)
// benchmarkFullCycle: 90,000 ops/s (11μs/op)