Comprehensive developer toolkit providing reusable skills for Java/Spring Boot, TypeScript/NestJS/React/Next.js, Python, PHP, AWS CloudFormation, AI/RAG, DevOps, and more.
82
82%
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Risky
Do not use without reviewing
Complete API reference for implementing RAG systems with LangChain4j.
FileSystemDocumentLoader: Load from filesystem.
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import java.nio.file.Path;
List<Document> documents = FileSystemDocumentLoader.load("documents");
List<Document> single = FileSystemDocumentLoader.load("document.pdf");ClassPathDocumentLoader: Load from classpath resources.
List<Document> resources = ClassPathDocumentLoader.load("documents");UrlDocumentLoader: Load from web URLs.
Document webDoc = UrlDocumentLoader.load("https://example.com/doc.html");interface DocumentSplitter {
List<TextSegment> split(Document document);
List<TextSegment> splitAll(Collection<Document> documents);
}Recursive Split: Smart recursive splitting by paragraphs, sentences, words.
DocumentSplitter splitter = DocumentSplitters.recursive(
500, // Max segment size (tokens or characters)
50 // Overlap size
);
// With token counting
DocumentSplitter splitter = DocumentSplitters.recursive(
500,
50,
new OpenAiTokenCountEstimator("gpt-4o-mini")
);Paragraph Split: Split by paragraphs.
DocumentSplitter splitter = DocumentSplitters.byParagraph(500, 50);Sentence Split: Split by sentences.
DocumentSplitter splitter = DocumentSplitters.bySentence(500, 50);Line Split: Split by lines.
DocumentSplitter splitter = DocumentSplitters.byLine(500, 50);public interface EmbeddingModel {
// Embed single text
Response<Embedding> embed(String text);
Response<Embedding> embed(TextSegment textSegment);
// Batch embedding
Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
// Model dimension
int dimension();
}EmbeddingModel model = OpenAiEmbeddingModel.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.modelName("text-embedding-3-small") // or text-embedding-3-large
.dimensions(512) // Optional: reduce dimensions
.timeout(Duration.ofSeconds(30))
.logRequests(true)
.logResponses(true)
.build();// Google Vertex AI
EmbeddingModel google = VertexAiEmbeddingModel.builder()
.project("PROJECT_ID")
.location("us-central1")
.modelName("textembedding-gecko")
.build();
// Ollama (local)
EmbeddingModel ollama = OllamaEmbeddingModel.builder()
.baseUrl("http://localhost:11434")
.modelName("all-minilm")
.build();
// AllMiniLmL6V2 (offline)
EmbeddingModel offline = new AllMiniLmL6V2EmbeddingModel();public interface EmbeddingStore<Embedded> {
// Add embeddings
String add(Embedding embedding);
String add(String id, Embedding embedding);
String add(Embedding embedding, Embedded embedded);
List<String> addAll(List<Embedding> embeddings);
List<String> addAll(List<Embedding> embeddings, List<Embedded> embeddeds);
List<String> addAll(List<String> ids, List<Embedding> embeddings, List<Embedded> embeddeds);
// Search embeddings
EmbeddingSearchResult<Embedded> search(EmbeddingSearchRequest request);
// Remove embeddings
void remove(String id);
void removeAll(Collection<String> ids);
void removeAll(Filter filter);
void removeAll();
}EmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
// Merge stores
InMemoryEmbeddingStore<TextSegment> merged = InMemoryEmbeddingStore.merge(
store1, store2, store3
);EmbeddingStore<TextSegment> store = PineconeEmbeddingStore.builder()
.apiKey(System.getenv("PINECONE_API_KEY"))
.index("my-index")
.namespace("production")
.environment("gcp-starter") // or "aws-us-east-1"
.build();EmbeddingStore<TextSegment> store = WeaviateEmbeddingStore.builder()
.host("localhost")
.port(8080)
.scheme("http")
.collectionName("Documents")
.build();EmbeddingStore<TextSegment> store = QdrantEmbeddingStore.builder()
.host("localhost")
.port(6333)
.collectionName("documents")
.build();EmbeddingStore<TextSegment> store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("my-collection")
.build();EmbeddingStore<TextSegment> store = Neo4jEmbeddingStore.builder()
.withBasicAuth("bolt://localhost:7687", "neo4j", "password")
.dimension(1536)
.label("Document")
.build();EmbeddingStore<TextSegment> store = MongoDbEmbeddingStore.builder()
.databaseName("search")
.collectionName("documents")
.indexName("vector_index")
.createIndex(true)
.fromClient(mongoClient)
.build();EmbeddingStore<TextSegment> store = PgVectorEmbeddingStore.builder()
.host("localhost")
.port(5432)
.database("embeddings")
.user("postgres")
.password("password")
.table("embeddings")
.createTableIfNotExists(true)
.build();EmbeddingStore<TextSegment> store = MilvusEmbeddingStore.builder()
.host("localhost")
.port(19530)
.collectionName("documents")
.dimension(1536)
.build();public class EmbeddingStoreIngestor {
public static Builder builder();
public IngestionResult ingest(Document document);
public IngestionResult ingest(Document... documents);
public IngestionResult ingest(Collection<Document> documents);
}EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
// Document transformation
.documentTransformer(doc -> {
doc.metadata().put("source", "manual");
return doc;
})
// Document splitting strategy
.documentSplitter(DocumentSplitters.recursive(500, 50))
// Text segment transformation
.textSegmentTransformer(segment -> {
String enhanced = "Category: Spring\n" + segment.text();
return TextSegment.from(enhanced, segment.metadata());
})
// Embedding model (required)
.embeddingModel(embeddingModel)
// Embedding store (required)
.embeddingStore(embeddingStore)
.build();IngestionResult result = ingestor.ingest(documents);
// Access results
TokenUsage usage = result.tokenUsage();
long totalTokens = usage.totalTokenCount();
long inputTokens = usage.inputTokenCount();EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(embedding) // Required
.maxResults(5) // Default: 3
.minScore(0.7) // Threshold 0-1
.filter(new IsEqualTo("category", "tutorial"))
.build();EmbeddingSearchResult<TextSegment> result = store.search(request);
List<EmbeddingMatch<TextSegment>> matches = result.matches();
for (EmbeddingMatch<TextSegment> match : matches) {
double score = match.score(); // Relevance 0-1
TextSegment segment = match.embedded(); // Retrieved content
String id = match.embeddingId(); // Store ID
}public interface ContentRetriever {
Content retrieve(Query query);
List<Content> retrieveAll(List<Query> queries);
}ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
// Static configuration
.maxResults(5)
.minScore(0.7)
// Dynamic configuration per query
.dynamicMaxResults(query -> 10)
.dynamicMinScore(query -> 0.8)
.dynamicFilter(query ->
new IsEqualTo("userId", extractUserId(query))
)
.build();public interface RetrievalAugmentor {
AugmentationResult augment(UserMessage message);
AugmentationResult augmentAll(List<UserMessage> messages);
}RetrievalAugmentor augmentor = DefaultRetrievalAugmentor.builder()
// Query transformation
.queryTransformer(new CompressingQueryTransformer(chatModel))
// Content retrieval
.contentRetriever(contentRetriever)
// Content aggregation and re-ranking
.contentAggregator(ReRankingContentAggregator.builder()
.scoringModel(scoringModel)
.minScore(0.8)
.build())
// Parallelization
.executor(customExecutor)
.build();Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.retrievalAugmentor(augmentor)
.build();// Create from map
Metadata meta = Metadata.from(Map.of(
"userId", "user123",
"category", "tutorial",
"score", 0.95
));
// Add entries
meta.put("status", "active");
meta.put("version", 2);
// Retrieve entries
String userId = meta.getString("userId");
int version = meta.getInt("version");
double score = meta.getDouble("score");
// Check existence
boolean has = meta.containsKey("userId");
// Remove entry
meta.remove("userId");
// Merge
Metadata other = Metadata.from(Map.of("source", "db"));
meta.merge(other);import dev.langchain4j.store.embedding.filter.comparison.*;
import dev.langchain4j.store.embedding.filter.logical.*;
// Equality
Filter filter = new IsEqualTo("status", "active");
Filter filter = new IsNotEqualTo("deprecated", "true");
// Comparison
Filter filter = new IsGreaterThan("score", 0.8);
Filter filter = new IsLessThanOrEqualTo("daysOld", 30);
Filter filter = new IsGreaterThanOrEqualTo("priority", 5);
Filter filter = new IsLessThan("errorRate", 0.01);
// Membership
Filter filter = new IsIn("category", Arrays.asList("tech", "guide"));
Filter filter = new IsNotIn("status", Arrays.asList("archived"));
// String operations
Filter filter = new ContainsString("content", "Spring");
// Logical operations
Filter filter = new And(
new IsEqualTo("userId", "123"),
new IsGreaterThan("score", 0.7)
);
Filter filter = new Or(
new IsEqualTo("type", "doc"),
new IsEqualTo("type", "guide")
);
Filter filter = new Not(new IsEqualTo("archived", "true"));// Text only
TextSegment segment = TextSegment.from("This is the content");
// With metadata
Metadata metadata = Metadata.from(Map.of("source", "docs"));
TextSegment segment = TextSegment.from("Content", metadata);
// Accessing
String text = segment.text();
Metadata meta = segment.metadata();plugins
developer-kit-ai
skills
chunking-strategy
prompt-engineering
developer-kit-aws
skills
aws
aws-cli-beast
aws-cost-optimization
aws-drawio-architecture-diagrams
aws-sam-bootstrap
aws-cloudformation
aws-cloudformation-auto-scaling
references
aws-cloudformation-bedrock
references
aws-cloudformation-cloudfront
references
aws-cloudformation-cloudwatch
references
aws-cloudformation-dynamodb
references
aws-cloudformation-ec2
aws-cloudformation-ecs
references
aws-cloudformation-elasticache
aws-cloudformation-iam
references
aws-cloudformation-lambda
references
aws-cloudformation-rds
aws-cloudformation-s3
references
aws-cloudformation-security
references
aws-cloudformation-task-ecs-deploy-gh
aws-cloudformation-vpc
developer-kit-core
skills
developer-kit-java
skills
aws-lambda-java-integration
aws-rds-spring-boot-integration
aws-sdk-java-v2-bedrock
aws-sdk-java-v2-core
aws-sdk-java-v2-dynamodb
aws-sdk-java-v2-kms
aws-sdk-java-v2-lambda
aws-sdk-java-v2-messaging
aws-sdk-java-v2-rds
aws-sdk-java-v2-s3
aws-sdk-java-v2-secrets-manager
graalvm-native-image
langchain4j
langchain4j-mcp-server-patterns
langchain4j-ai-services-patterns
references
langchain4j-mcp-server-patterns
references
langchain4j-rag-implementation-patterns
references
langchain4j-spring-boot-integration
langchain4j-testing-strategies
langchain4j-tool-function-calling-patterns
langchain4j-vector-stores-configuration
references
qdrant
references
spring-ai-mcp-server-patterns
references
spring-boot-actuator
spring-boot-cache
spring-boot-crud-patterns
spring-boot-dependency-injection
spring-boot-event-driven-patterns
spring-boot-openapi-documentation
spring-boot-project-creator
spring-boot-resilience4j
spring-boot-rest-api-standards
spring-boot-saga-pattern
spring-boot-security-jwt
assets
references
scripts
spring-boot-test-patterns
spring-data-jpa
references
spring-data-neo4j
references
unit-test-application-events
unit-test-bean-validation
unit-test-boundary-conditions
unit-test-caching
unit-test-config-properties
unit-test-controller-layer
unit-test-exception-handler
unit-test-json-serialization
unit-test-mapper-converter
unit-test-parameterized
unit-test-scheduled-async
unit-test-service-layer
unit-test-utility-methods
unit-test-wiremock-rest-api
developer-kit-php
skills
aws-lambda-php-integration
developer-kit-python
skills
aws-lambda-python-integration
developer-kit-tools
developer-kit-typescript
skills
aws-lambda-typescript-integration
better-auth
drizzle-orm-patterns
dynamodb-toolbox-patterns
references
nestjs
nestjs-best-practices
nestjs-code-review
nestjs-drizzle-crud-generator
scripts
nextjs-app-router
nextjs-authentication
nextjs-code-review
nextjs-data-fetching
references
nextjs-deployment
nextjs-performance
nx-monorepo
react-code-review
react-patterns
references
shadcn-ui
tailwind-css-patterns
references
tailwind-design-system
references
turborepo-monorepo
typescript-docs
typescript-security-review
zod-validation-utilities