or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

advanced

error-handling.mdtype-inference.md
glossary.mdindex.mdquick-reference.mdtask-index.md
tile.json

documents.mddocs/api-reference/

Document API Reference

Complete API reference for working with text documents in LangChain applications.

Document Class

/**
 * Document class for representing text documents with metadata
 */
class Document<Metadata extends Record<string, any> = Record<string, any>> {
  /**
   * The text content of the document
   */
  pageContent: string;

  /**
   * Arbitrary metadata about the document
   */
  metadata: Metadata;

  /**
   * Create a new document
   * @param fields - Document initialization fields
   */
  constructor(fields: DocumentInput<Metadata>);
}

/**
 * Document input interface
 */
interface DocumentInput<Metadata extends Record<string, any> = Record<string, any>> {
  /**
   * The text content
   */
  pageContent: string;

  /**
   * Optional metadata
   */
  metadata?: Metadata;
}

Usage Examples

Creating Documents

import { Document } from "langchain";

// Simple document
const doc1 = new Document({
  pageContent: "This is the content of the document.",
});

// Document with metadata
const doc2 = new Document({
  pageContent: "LangChain is a framework for building LLM applications.",
  metadata: {
    source: "documentation",
    author: "LangChain",
    date: "2024-01-01",
    category: "tutorial",
  },
});

// Typed metadata
interface ArticleMetadata {
  title: string;
  author: string;
  publishDate: string;
  tags: string[];
}

const article = new Document<ArticleMetadata>({
  pageContent: "Article content here...",
  metadata: {
    title: "Introduction to LangChain",
    author: "Jane Doe",
    publishDate: "2024-01-15",
    tags: ["langchain", "ai", "llm"],
  },
});

Accessing Document Properties

import { Document } from "langchain";

const doc = new Document({
  pageContent: "Content",
  metadata: { source: "file.txt" },
});

console.log(doc.pageContent); // "Content"
console.log(doc.metadata.source); // "file.txt"

Working with Document Collections

import { Document } from "langchain";

// Create multiple documents
const documents: Document[] = [
  new Document({
    pageContent: "First document content",
    metadata: { id: 1, category: "A" },
  }),
  new Document({
    pageContent: "Second document content",
    metadata: { id: 2, category: "B" },
  }),
  new Document({
    pageContent: "Third document content",
    metadata: { id: 3, category: "A" },
  }),
];

// Filter documents
const categoryA = documents.filter((doc) => doc.metadata.category === "A");

// Map over documents
const contentLengths = documents.map((doc) => doc.pageContent.length);

// Sort documents
const sortedDocs = documents.sort((a, b) => a.metadata.id - b.metadata.id);

Document Storage

import { Document } from "langchain";
import { createDocumentStoreFromByteStore, LocalFileStore } from "langchain/storage/encoder_backed";

// Create document store
const fileStore = await LocalFileStore.fromPath("./docs");
const docStore = createDocumentStoreFromByteStore(fileStore);

// Store documents
await docStore.mset([
  [
    "doc:1",
    new Document({
      pageContent: "Document content",
      metadata: { title: "My Document" },
    }),
  ],
]);

// Retrieve documents
const [doc] = await docStore.mget(["doc:1"]);
console.log(doc.pageContent);
console.log(doc.metadata.title);

Documents from Files

import { Document } from "langchain";
import * as fs from "fs/promises";

// Load document from file
async function loadDocument(filePath: string): Promise<Document> {
  const content = await fs.readFile(filePath, "utf-8");

  return new Document({
    pageContent: content,
    metadata: {
      source: filePath,
      loadedAt: new Date().toISOString(),
    },
  });
}

const doc = await loadDocument("./article.txt");

Documents with Rich Metadata

import { Document } from "langchain";

interface RichMetadata {
  // Source information
  source: string;
  sourceType: "file" | "url" | "database";

  // Content metadata
  title?: string;
  author?: string;
  publishDate?: string;

  // Processing metadata
  chunkIndex?: number;
  totalChunks?: number;

  // Custom fields
  [key: string]: any;
}

const doc = new Document<RichMetadata>({
  pageContent: "Content...",
  metadata: {
    source: "https://example.com/article",
    sourceType: "url",
    title: "Article Title",
    author: "John Smith",
    publishDate: "2024-01-01",
    chunkIndex: 0,
    totalChunks: 5,
    customField: "custom value",
  },
});

Best Practices

Content

  • Keep pageContent as plain text
  • Store formatted content in metadata if needed
  • Split large documents into chunks
  • Normalize whitespace and encoding

Metadata

  • Use consistent metadata keys across documents
  • Include source information for traceability
  • Add timestamps for temporal tracking
  • Use typed metadata interfaces for type safety

Collections

  • Use meaningful document IDs in stores
  • Implement consistent naming schemes
  • Consider pagination for large collections
  • Index metadata fields for efficient querying

Storage

  • Choose appropriate storage backend
  • Implement proper error handling
  • Consider compression for large documents
  • Back up important document collections