LangChain AWS integration providing chat models, embeddings, and retrievers for seamless AWS service connections.
Retrieve relevant documents from AWS services for RAG applications and knowledge search workflows using Amazon Kendra's intelligent search capabilities and Amazon Bedrock Knowledge Bases for vector-based retrieval.
Intelligent document retrieval using Amazon Kendra search service with support for semantic search, attribute filtering, and automatic fallback between API methods.
/**
* Retriever for Amazon Kendra intelligent search service
*/
class AmazonKendraRetriever extends BaseRetriever {
constructor(args: AmazonKendraRetrieverArgs);
/** Main method to retrieve relevant documents for a query */
getRelevantDocuments(query: string): Promise<Document[]>;
/** Query Kendra with Retrieve API and fallback to Query API */
queryKendra(query: string, topK: number, attributeFilter?: AttributeFilter): Promise<Document[]>;
/** Combine title and excerpt into single text */
combineText(title?: string, excerpt?: string): string;
/** Clean result text by removing extra whitespace */
cleanResult(text: string): string;
/** Extract document attributes from Kendra response */
getDocAttributes(attributes?: DocumentAttribute[]): Record<string, any>;
}Usage Examples:
import { AmazonKendraRetriever } from "@langchain/aws";
// Basic initialization
const kendraRetriever = new AmazonKendraRetriever({
indexId: "your-kendra-index-id",
topK: 10,
region: "us-east-1",
clientOptions: {
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY
}
}
});
// Retrieve documents
const documents = await kendraRetriever.getRelevantDocuments(
"How do I configure SSL certificates?"
);
documents.forEach((doc, index) => {
console.log(`Document ${index + 1}:`);
console.log(`Content: ${doc.pageContent}`);
console.log(`Source: ${doc.metadata.source}`);
console.log(`Title: ${doc.metadata.title}`);
console.log(`---`);
});Vector-based document retrieval using Amazon Bedrock Knowledge Bases for RAG workflows with support for hybrid search and advanced filtering.
/**
* Retriever for Amazon Bedrock Knowledge Bases RAG workflow
*/
class AmazonKnowledgeBaseRetriever extends BaseRetriever {
constructor(args: AmazonKnowledgeBaseRetrieverArgs);
/** Main method to retrieve relevant documents for a query */
getRelevantDocuments(query: string): Promise<Document[]>;
/** Query knowledge base directly with advanced parameters */
queryKnowledgeBase(query: string, topK: number, filter?: RetrievalFilter, overrideSearchType?: SearchType): Promise<Document[]>;
/** Clean result text by normalizing whitespace and removing ellipses */
cleanResult(text: string): string;
}Usage Examples:
import { AmazonKnowledgeBaseRetriever } from "@langchain/aws";
// Basic initialization
const kbRetriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-knowledge-base-id",
topK: 5,
region: "us-east-1",
clientOptions: {
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY
}
}
});
// Retrieve documents with hybrid search
const hybridRetriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-knowledge-base-id",
topK: 8,
region: "us-east-1",
overrideSearchType: "HYBRID" // or "SEMANTIC"
});
const documents = await hybridRetriever.getRelevantDocuments(
"What are the security best practices for cloud deployment?"
);interface AmazonKendraRetrieverArgs {
/** Amazon Kendra index identifier */
indexId: string;
/** Maximum number of documents to retrieve */
topK: number;
/** AWS region where the Kendra index is located */
region: string;
/** Optional attribute filter for refined search */
attributeFilter?: AttributeFilter;
/** Optional Kendra client configuration */
clientOptions?: KendraClientConfig;
}interface AmazonKnowledgeBaseRetrieverArgs {
/** Amazon Bedrock Knowledge Base identifier */
knowledgeBaseId: string;
/** Maximum number of documents to retrieve */
topK: number;
/** AWS region where the Knowledge Base is located */
region: string;
/** Optional Bedrock Agent Runtime client configuration */
clientOptions?: BedrockAgentRuntimeClientConfig;
/** Optional retrieval filter for refined search */
filter?: RetrievalFilter;
/** Override search type (HYBRID or SEMANTIC) */
overrideSearchType?: SearchType;
}Amazon Kendra supports sophisticated attribute-based filtering for precise document retrieval.
type AttributeFilter = {
AndAllFilters?: AttributeFilter[];
OrAllFilters?: AttributeFilter[];
NotFilter?: AttributeFilter;
EqualsTo?: DocumentAttribute;
ContainsAll?: DocumentAttribute;
ContainsAny?: DocumentAttribute;
GreaterThan?: DocumentAttribute;
GreaterThanOrEquals?: DocumentAttribute;
LessThan?: DocumentAttribute;
LessThanOrEquals?: DocumentAttribute;
};Usage Examples:
// Filter by document source and date
const kendraWithFilters = new AmazonKendraRetriever({
indexId: "your-index-id",
topK: 10,
region: "us-east-1",
attributeFilter: {
AndAllFilters: [
{
EqualsTo: {
Key: "_source_uri",
Value: { StringValue: "https://docs.example.com" }
}
},
{
GreaterThanOrEquals: {
Key: "_last_updated_at",
Value: { DateValue: new Date("2024-01-01") }
}
}
]
}
});
// Filter by category with OR logic
const categoryFilter = new AmazonKendraRetriever({
indexId: "your-index-id",
topK: 15,
region: "us-east-1",
attributeFilter: {
OrAllFilters: [
{
EqualsTo: {
Key: "category",
Value: { StringValue: "documentation" }
}
},
{
EqualsTo: {
Key: "category",
Value: { StringValue: "tutorial" }
}
}
]
}
});
const docs = await categoryFilter.getRelevantDocuments("API authentication");Amazon Bedrock Knowledge Bases support metadata-based filtering for targeted retrieval.
type RetrievalFilter = {
equals?: {
key: string;
value: string | number | boolean;
};
notEquals?: {
key: string;
value: string | number | boolean;
};
lessThan?: {
key: string;
value: number;
};
lessThanOrEquals?: {
key: string;
value: number;
};
greaterThan?: {
key: string;
value: number;
};
greaterThanOrEquals?: {
key: string;
value: number;
};
in?: {
key: string;
value: (string | number | boolean)[];
};
notIn?: {
key: string;
value: (string | number | boolean)[];
};
startsWith?: {
key: string;
value: string;
};
listContains?: {
key: string;
value: string | number | boolean;
};
stringContains?: {
key: string;
value: string;
};
andAll?: RetrievalFilter[];
orAll?: RetrievalFilter[];
};Usage Examples:
// Filter by document type and recency
const kbWithFilters = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 10,
region: "us-east-1",
filter: {
andAll: [
{
equals: {
key: "document_type",
value: "user_guide"
}
},
{
greaterThan: {
key: "publish_date",
value: 20240101
}
}
]
}
});
// Filter by multiple categories
const multiCategoryKB = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 8,
region: "us-east-1",
filter: {
in: {
key: "category",
value: ["api", "security", "deployment"]
}
}
});
const securityDocs = await multiCategoryKB.getRelevantDocuments(
"How to implement OAuth2 authentication?"
);Amazon Kendra automatically uses intelligent search combining keyword and semantic understanding.
Features:
Amazon Bedrock Knowledge Bases support different search strategies.
type SearchType = "HYBRID" | "SEMANTIC";Usage Examples:
// Semantic search - pure vector similarity
const semanticRetriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 10,
region: "us-east-1",
overrideSearchType: "SEMANTIC"
});
// Hybrid search - combines vector similarity with keyword matching
const hybridRetriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 10,
region: "us-east-1",
overrideSearchType: "HYBRID"
});
// Compare search strategies
const query = "microservices architecture patterns";
const semanticResults = await semanticRetriever.getRelevantDocuments(query);
const hybridResults = await hybridRetriever.getRelevantDocuments(query);
console.log("Semantic results:", semanticResults.length);
console.log("Hybrid results:", hybridResults.length);Both retrievers return rich metadata alongside document content for enhanced RAG applications.
// Example Kendra document metadata
{
pageContent: "Document content text...",
metadata: {
source: "https://docs.example.com/guide.html",
title: "Configuration Guide",
excerpt: "Brief excerpt from the document...",
document_attributes: {
category: "documentation",
last_updated: "2024-03-15",
author: "Tech Writing Team"
}
}
}// Example Knowledge Base document metadata
{
pageContent: "Document content text...",
metadata: {
source: "s3://my-bucket/docs/deployment-guide.pdf",
location: {
s3Location: {
uri: "s3://my-bucket/docs/deployment-guide.pdf"
},
type: "S3"
},
score: 0.85,
metadata: {
document_type: "user_guide",
version: "2.1",
publish_date: 20240315
}
}
}Common patterns for integrating retrievers with RAG (Retrieval-Augmented Generation) workflows.
Usage Examples:
import { ChatBedrockConverse } from "@langchain/aws";
import { HumanMessage } from "@langchain/core/messages";
// Create RAG chain with retriever
const retriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 5,
region: "us-east-1"
});
const chatModel = new ChatBedrockConverse({
region: "us-east-1",
model: "anthropic.claude-3-5-sonnet-20240620-v1:0"
});
async function answerWithRAG(question: string): Promise<string> {
// Retrieve relevant documents
const relevantDocs = await retriever.getRelevantDocuments(question);
// Format context from retrieved documents
const context = relevantDocs
.map((doc, index) => `Document ${index + 1}: ${doc.pageContent}`)
.join("\n\n");
// Generate answer using retrieved context
const prompt = `Based on the following context, answer the question.
Context:
${context}
Question: ${question}
Answer:`;
const response = await chatModel.invoke([
new HumanMessage(prompt)
]);
return response.content as string;
}
// Use the RAG function
const answer = await answerWithRAG(
"What are the recommended security practices for API deployment?"
);
console.log(answer);Comprehensive error handling patterns for production deployments.
Usage Examples:
async function robustRetrieval(query: string) {
const retriever = new AmazonKendraRetriever({
indexId: "your-index-id",
topK: 10,
region: "us-east-1"
});
try {
const documents = await retriever.getRelevantDocuments(query);
if (documents.length === 0) {
console.log("No relevant documents found, try rephrasing your query");
return [];
}
return documents;
} catch (error) {
if (error.name === "ResourceNotFoundException") {
console.error("Kendra index not found - check index ID and region");
} else if (error.name === "AccessDeniedException") {
console.error("Access denied - check IAM permissions for Kendra");
} else if (error.name === "ThrottlingException") {
console.error("Rate limited - implement exponential backoff");
// Implement retry logic here
} else {
console.error("Retrieval failed:", error.message);
}
return [];
}
}
// Implement retry logic for transient failures
async function retrieverWithRetry(retriever: AmazonKendraRetriever, query: string, maxRetries = 3) {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
return await retriever.getRelevantDocuments(query);
} catch (error) {
if (attempt === maxRetries) throw error;
if (error.name === "ThrottlingException" || error.name === "ServiceException") {
const delay = Math.pow(2, attempt) * 1000; // Exponential backoff
console.log(`Retry attempt ${attempt} after ${delay}ms`);
await new Promise(resolve => setTimeout(resolve, delay));
} else {
throw error; // Don't retry for non-transient errors
}
}
}
}Best practices for optimizing retriever performance in production applications.
Strategies:
// Optimized retriever configuration
const optimizedRetriever = new AmazonKnowledgeBaseRetriever({
knowledgeBaseId: "your-kb-id",
topK: 5, // Smaller topK for faster responses
region: "us-east-1", // Use closest region
overrideSearchType: "HYBRID", // Often better than pure semantic
filter: {
// Pre-filter to relevant document types
in: {
key: "document_type",
value: ["manual", "guide", "faq"]
}
}
});
// Simple in-memory cache for frequent queries
const queryCache = new Map<string, Document[]>();
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes
async function cachedRetrieval(query: string): Promise<Document[]> {
const cacheKey = query.toLowerCase().trim();
if (queryCache.has(cacheKey)) {
const cached = queryCache.get(cacheKey)!;
// Check if cache entry is still valid (simple timestamp check)
return cached;
}
const documents = await optimizedRetriever.getRelevantDocuments(query);
queryCache.set(cacheKey, documents);
// Simple cache cleanup (in production, use proper TTL)
setTimeout(() => queryCache.delete(cacheKey), CACHE_TTL);
return documents;
}Install with Tessl CLI
npx tessl i tessl/npm-langchain--aws