docs
Complete API reference for working with text documents in LangChain applications.
/**
* Document class for representing text documents with metadata
*/
class Document<Metadata extends Record<string, any> = Record<string, any>> {
/**
* The text content of the document
*/
pageContent: string;
/**
* Arbitrary metadata about the document
*/
metadata: Metadata;
/**
* Create a new document
* @param fields - Document initialization fields
*/
constructor(fields: DocumentInput<Metadata>);
}
/**
* Document input interface
*/
interface DocumentInput<Metadata extends Record<string, any> = Record<string, any>> {
/**
* The text content
*/
pageContent: string;
/**
* Optional metadata
*/
metadata?: Metadata;
}import { Document } from "langchain";
// Simple document
const doc1 = new Document({
pageContent: "This is the content of the document.",
});
// Document with metadata
const doc2 = new Document({
pageContent: "LangChain is a framework for building LLM applications.",
metadata: {
source: "documentation",
author: "LangChain",
date: "2024-01-01",
category: "tutorial",
},
});
// Typed metadata
interface ArticleMetadata {
title: string;
author: string;
publishDate: string;
tags: string[];
}
const article = new Document<ArticleMetadata>({
pageContent: "Article content here...",
metadata: {
title: "Introduction to LangChain",
author: "Jane Doe",
publishDate: "2024-01-15",
tags: ["langchain", "ai", "llm"],
},
});import { Document } from "langchain";
const doc = new Document({
pageContent: "Content",
metadata: { source: "file.txt" },
});
console.log(doc.pageContent); // "Content"
console.log(doc.metadata.source); // "file.txt"import { Document } from "langchain";
// Create multiple documents
const documents: Document[] = [
new Document({
pageContent: "First document content",
metadata: { id: 1, category: "A" },
}),
new Document({
pageContent: "Second document content",
metadata: { id: 2, category: "B" },
}),
new Document({
pageContent: "Third document content",
metadata: { id: 3, category: "A" },
}),
];
// Filter documents
const categoryA = documents.filter((doc) => doc.metadata.category === "A");
// Map over documents
const contentLengths = documents.map((doc) => doc.pageContent.length);
// Sort documents
const sortedDocs = documents.sort((a, b) => a.metadata.id - b.metadata.id);import { Document } from "langchain";
import { createDocumentStoreFromByteStore, LocalFileStore } from "langchain/storage/encoder_backed";
// Create document store
const fileStore = await LocalFileStore.fromPath("./docs");
const docStore = createDocumentStoreFromByteStore(fileStore);
// Store documents
await docStore.mset([
[
"doc:1",
new Document({
pageContent: "Document content",
metadata: { title: "My Document" },
}),
],
]);
// Retrieve documents
const [doc] = await docStore.mget(["doc:1"]);
console.log(doc.pageContent);
console.log(doc.metadata.title);import { Document } from "langchain";
import * as fs from "fs/promises";
// Load document from file
async function loadDocument(filePath: string): Promise<Document> {
const content = await fs.readFile(filePath, "utf-8");
return new Document({
pageContent: content,
metadata: {
source: filePath,
loadedAt: new Date().toISOString(),
},
});
}
const doc = await loadDocument("./article.txt");import { Document } from "langchain";
interface RichMetadata {
// Source information
source: string;
sourceType: "file" | "url" | "database";
// Content metadata
title?: string;
author?: string;
publishDate?: string;
// Processing metadata
chunkIndex?: number;
totalChunks?: number;
// Custom fields
[key: string]: any;
}
const doc = new Document<RichMetadata>({
pageContent: "Content...",
metadata: {
source: "https://example.com/article",
sourceType: "url",
title: "Article Title",
author: "John Smith",
publishDate: "2024-01-01",
chunkIndex: 0,
totalChunks: 5,
customField: "custom value",
},
});