0
# Document Processing
1
2
Document representation and processing capabilities for content management in LangChain applications. Documents are the fundamental units of text data used throughout the framework.
3
4
## Capabilities
5
6
### Document
7
8
Core document representation containing text content and metadata.
9
10
```typescript { .api }
11
/**
12
* Core document representation
13
* @template Metadata - Type of metadata object
14
*/
15
class Document<Metadata = Record<string, unknown>> {
16
/** Main text content of the document */
17
pageContent: string;
18
/** Associated metadata */
19
metadata: Metadata;
20
/** Optional unique identifier */
21
id?: string;
22
23
constructor(fields: DocumentInput<Metadata>);
24
25
/** Convert to JSON representation */
26
toJSON(): Serialized;
27
28
/** Create document from JSON */
29
static fromJSON(json: Serialized): Document;
30
}
31
```
32
33
**Usage Examples:**
34
35
```typescript
36
import { Document } from "@langchain/core/documents";
37
38
// Simple document
39
const doc1 = new Document({
40
pageContent: "LangChain is a framework for building applications with LLMs.",
41
metadata: {
42
source: "documentation",
43
category: "introduction"
44
}
45
});
46
47
// Document with custom metadata type
48
interface ArticleMetadata {
49
title: string;
50
author: string;
51
publishDate: Date;
52
tags: string[];
53
}
54
55
const article = new Document<ArticleMetadata>({
56
pageContent: "This is the article content...",
57
metadata: {
58
title: "Understanding LangChain",
59
author: "John Doe",
60
publishDate: new Date("2024-01-15"),
61
tags: ["langchain", "llm", "tutorial"]
62
},
63
id: "article-123"
64
});
65
66
console.log(article.pageContent); // "This is the article content..."
67
console.log(article.metadata.title); // "Understanding LangChain"
68
```
69
70
## Types
71
72
```typescript { .api }
73
interface DocumentInput<Metadata = Record<string, unknown>> {
74
/** Main text content */
75
pageContent: string;
76
/** Document metadata */
77
metadata?: Metadata;
78
/** Optional document identifier */
79
id?: string;
80
}
81
82
interface DocumentInterface<Metadata = Record<string, unknown>> {
83
pageContent: string;
84
metadata: Metadata;
85
id?: string;
86
}
87
```