or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents-tools.mddocuments-nodes.mdevaluation.mdindex.mdindices.mdllms-embeddings.mdnode-parsers.mdpostprocessors.mdprompts.mdquery-engines.mdretrievers.mdsettings.mdstorage.md

documents-nodes.mddocs/

0

# Documents & Nodes

1

2

Core data structures for representing textual content, managing metadata, and organizing information in LlamaIndex applications. Documents serve as the primary input format, while nodes provide the fundamental unit for indexing and retrieval operations.

3

4

## Capabilities

5

6

### Document Creation & Management

7

8

Documents represent the primary input format for LlamaIndex, containing text content with optional metadata and supporting various content types including text and images.

9

10

```python { .api }

11

class Document:

12

"""

13

A document represents a piece of unstructured text with optional metadata.

14

15

Parameters:

16

- text: str, the main text content

17

- metadata: Optional[dict], key-value metadata pairs

18

- excluded_embed_metadata_keys: Optional[List[str]], metadata keys to exclude from embedding

19

- excluded_llm_metadata_keys: Optional[List[str]], metadata keys to exclude from LLM context

20

- relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes

21

- mimetype: Optional[str], MIME type of the content

22

- start_char_idx: Optional[int], starting character index in source

23

- end_char_idx: Optional[int], ending character index in source

24

"""

25

def __init__(

26

self,

27

text: str,

28

metadata: Optional[dict] = None,

29

excluded_embed_metadata_keys: Optional[List[str]] = None,

30

excluded_llm_metadata_keys: Optional[List[str]] = None,

31

relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,

32

mimetype: Optional[str] = None,

33

start_char_idx: Optional[int] = None,

34

end_char_idx: Optional[int] = None,

35

**kwargs

36

): ...

37

38

def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:

39

"""Get text content with optional metadata inclusion."""

40

41

def set_content(self, value: str) -> None:

42

"""Set the text content."""

43

44

def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:

45

"""Get formatted metadata string."""

46

47

def get_doc_id(self) -> str:

48

"""Get document ID."""

49

50

def __str__(self) -> str: ...

51

```

52

53

### Image Documents

54

55

Specialized document type for handling image content with text descriptions and image-specific metadata.

56

57

```python { .api }

58

class ImageDocument(Document):

59

"""

60

Document containing image data with optional text description.

61

62

Parameters:

63

- text: str, text description of the image

64

- image: Optional[str], base64 encoded image data or image path

65

- image_path: Optional[str], path to image file

66

- image_url: Optional[str], URL to image

67

- metadata: Optional[dict], additional metadata

68

"""

69

def __init__(

70

self,

71

text: str = "",

72

image: Optional[str] = None,

73

image_path: Optional[str] = None,

74

image_url: Optional[str] = None,

75

metadata: Optional[dict] = None,

76

**kwargs

77

): ...

78

79

def resolve_image(self) -> str:

80

"""Resolve image to base64 encoded string."""

81

```

82

83

### Base Node Structure

84

85

Foundation class for all node types, providing core functionality for text content, metadata management, and relationship tracking.

86

87

```python { .api }

88

class BaseNode:

89

"""

90

Base class for all node types in LlamaIndex.

91

92

Parameters:

93

- id_: str, unique identifier for the node

94

- embedding: Optional[List[float]], vector embedding for the node

95

- metadata: Optional[dict], key-value metadata pairs

96

- excluded_embed_metadata_keys: Optional[List[str]], metadata keys excluded from embedding

97

- excluded_llm_metadata_keys: Optional[List[str]], metadata keys excluded from LLM context

98

- relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]], relationships to other nodes

99

- start_char_idx: Optional[int], starting character index in source

100

- end_char_idx: Optional[int], ending character index in source

101

"""

102

def __init__(

103

self,

104

id_: Optional[str] = None,

105

embedding: Optional[List[float]] = None,

106

metadata: Optional[dict] = None,

107

excluded_embed_metadata_keys: Optional[List[str]] = None,

108

excluded_llm_metadata_keys: Optional[List[str]] = None,

109

relationships: Optional[Dict[NodeRelationship, RelatedNodeInfo]] = None,

110

start_char_idx: Optional[int] = None,

111

end_char_idx: Optional[int] = None,

112

**kwargs

113

): ...

114

115

@property

116

def node_id(self) -> str:

117

"""Get node identifier."""

118

119

@node_id.setter

120

def node_id(self, node_id: str) -> None:

121

"""Set node identifier."""

122

123

def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:

124

"""Get node content with optional metadata."""

125

126

def get_metadata_str(self, mode: MetadataMode = MetadataMode.ALL) -> str:

127

"""Get formatted metadata string."""

128

129

def set_content(self, value: str) -> None:

130

"""Set node content."""

131

132

def get_embedding(self) -> List[float]:

133

"""Get node embedding vector."""

134

135

def as_related_node_info(self) -> RelatedNodeInfo:

136

"""Convert to RelatedNodeInfo for relationship tracking."""

137

```

138

139

### Text Nodes

140

141

Primary node type for text content, extending BaseNode with text-specific functionality and serving as the fundamental unit for most LlamaIndex operations.

142

143

```python { .api }

144

class TextNode(BaseNode):

145

"""

146

Node containing text content for indexing and retrieval.

147

148

Parameters:

149

- text: str, the text content of the node

150

- start_char_idx: Optional[int], starting character index in source document

151

- end_char_idx: Optional[int], ending character index in source document

152

- text_template: str, template for formatting text with metadata

153

- metadata_template: str, template for formatting metadata

154

- metadata_separator: str, separator between metadata items

155

"""

156

def __init__(

157

self,

158

text: str = "",

159

start_char_idx: Optional[int] = None,

160

end_char_idx: Optional[int] = None,

161

text_template: str = "{metadata_str}\\n\\n{content}",

162

metadata_template: str = "{key}: {value}",

163

metadata_separator: str = "\\n",

164

**kwargs

165

): ...

166

167

@classmethod

168

def get_type(cls) -> str:

169

"""Get node type identifier."""

170

171

def get_text(self) -> str:

172

"""Get raw text content."""

173

174

def set_text(self, text: str) -> None:

175

"""Set text content."""

176

```

177

178

### Image Nodes

179

180

Specialized nodes for handling image content, extending TextNode with image-specific capabilities and metadata.

181

182

```python { .api }

183

class ImageNode(TextNode):

184

"""

185

Node containing image data with optional text description.

186

187

Parameters:

188

- text: str, text description of the image

189

- image: Optional[str], base64 encoded image data or image path

190

- image_path: Optional[str], path to image file

191

- image_url: Optional[str], URL to image

192

- image_mimetype: Optional[str], MIME type of image

193

- text_embedding: Optional[List[float]], embedding for text content

194

- image_embedding: Optional[List[float]], embedding for image content

195

"""

196

def __init__(

197

self,

198

text: str = "",

199

image: Optional[str] = None,

200

image_path: Optional[str] = None,

201

image_url: Optional[str] = None,

202

image_mimetype: Optional[str] = None,

203

text_embedding: Optional[List[float]] = None,

204

image_embedding: Optional[List[float]] = None,

205

**kwargs

206

): ...

207

208

def resolve_image(self) -> str:

209

"""Resolve image to base64 encoded string."""

210

211

def set_image(self, image: Optional[str]) -> None:

212

"""Set image data."""

213

```

214

215

### Index Reference Nodes

216

217

Nodes that reference other indices, enabling hierarchical and composable index structures for complex document organizations.

218

219

```python { .api }

220

class IndexNode(BaseNode):

221

"""

222

Node that references another index for hierarchical structures.

223

224

Parameters:

225

- text: str, text description of the referenced index

226

- index_id: str, identifier of the referenced index

227

- obj: Optional[BaseIndex], the referenced index object

228

"""

229

def __init__(

230

self,

231

text: str = "",

232

index_id: Optional[str] = None,

233

obj: Optional[BaseIndex] = None,

234

**kwargs

235

): ...

236

237

@classmethod

238

def from_text_node(cls, node: TextNode, index_id: str) -> "IndexNode":

239

"""Create IndexNode from TextNode."""

240

```

241

242

### Node Scoring & Ranking

243

244

Container for nodes with associated relevance scores, used throughout retrieval and ranking operations.

245

246

```python { .api }

247

class NodeWithScore:

248

"""

249

Container for a node with an associated relevance score.

250

251

Parameters:

252

- node: BaseNode, the node content

253

- score: Optional[float], relevance score (higher = more relevant)

254

"""

255

def __init__(self, node: BaseNode, score: Optional[float] = None): ...

256

257

@property

258

def node_id(self) -> str:

259

"""Get node identifier."""

260

261

@property

262

def text(self) -> str:

263

"""Get node text content."""

264

265

def __str__(self) -> str: ...

266

267

def get_content(self, metadata_mode: MetadataMode = MetadataMode.ALL) -> str:

268

"""Get node content with metadata."""

269

```

270

271

### Node Relationships & References

272

273

System for tracking relationships between nodes and managing references to related content.

274

275

```python { .api }

276

class RelatedNodeInfo:

277

"""

278

Information about a related node.

279

280

Parameters:

281

- node_id: str, identifier of the related node

282

- node_type: Optional[ObjectType], type of the related node

283

- metadata: Optional[dict], metadata about the relationship

284

- hash: Optional[str], hash of the related node content

285

"""

286

def __init__(

287

self,

288

node_id: str,

289

node_type: Optional[ObjectType] = None,

290

metadata: Optional[dict] = None,

291

hash: Optional[str] = None

292

): ...

293

294

class NodeRelationship(str, Enum):

295

"""Types of relationships between nodes."""

296

SOURCE = "SOURCE" # Source document relationship

297

PREVIOUS = "PREVIOUS" # Previous node in sequence

298

NEXT = "NEXT" # Next node in sequence

299

PARENT = "PARENT" # Parent node in hierarchy

300

CHILD = "CHILD" # Child node in hierarchy

301

```

302

303

### Media Resource Handling

304

305

Support for various media types and resource management in documents and nodes.

306

307

```python { .api }

308

class MediaResource:

309

"""

310

Resource for handling media content in documents.

311

312

Parameters:

313

- text: Optional[str], text description of the resource

314

- url: Optional[str], URL to the resource

315

- path: Optional[str], local path to the resource

316

- mimetype: Optional[str], MIME type of the resource

317

"""

318

def __init__(

319

self,

320

text: Optional[str] = None,

321

url: Optional[str] = None,

322

path: Optional[str] = None,

323

mimetype: Optional[str] = None

324

): ...

325

```

326

327

## Usage Examples

328

329

### Creating and Managing Documents

330

331

```python

332

from llama_index.core import Document

333

from llama_index.core.schema import MetadataMode

334

335

# Create a basic document

336

doc = Document(

337

text="LlamaIndex provides tools for building RAG applications with LLMs.",

338

metadata={

339

"source": "documentation",

340

"category": "technical",

341

"author": "LlamaIndex Team"

342

}

343

)

344

345

# Access document content

346

print(doc.get_content()) # Includes metadata by default

347

print(doc.get_content(metadata_mode=MetadataMode.NONE)) # Text only

348

349

# Update document

350

doc.set_content("Updated content about LlamaIndex capabilities.")

351

```

352

353

### Working with Text Nodes

354

355

```python

356

from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo

357

358

# Create text nodes

359

node1 = TextNode(

360

text="Introduction to machine learning concepts.",

361

metadata={"chapter": "1", "topic": "intro"}

362

)

363

364

node2 = TextNode(

365

text="Deep learning architectures and applications.",

366

metadata={"chapter": "2", "topic": "deep_learning"}

367

)

368

369

# Establish relationships

370

node2.relationships[NodeRelationship.PREVIOUS] = node1.as_related_node_info()

371

node1.relationships[NodeRelationship.NEXT] = node2.as_related_node_info()

372

373

# Access node properties

374

print(f"Node ID: {node1.node_id}")

375

print(f"Text: {node1.get_text()}")

376

print(f"Content with metadata: {node1.get_content()}")

377

```

378

379

### Handling Scored Results

380

381

```python

382

from llama_index.core.schema import NodeWithScore

383

384

# Create scored nodes (typically from retrieval)

385

scored_nodes = [

386

NodeWithScore(node=node1, score=0.85),

387

NodeWithScore(node=node2, score=0.72)

388

]

389

390

# Process results

391

for scored_node in scored_nodes:

392

print(f"Score: {scored_node.score:.2f}")

393

print(f"Text: {scored_node.text}")

394

print(f"Node ID: {scored_node.node_id}")

395

```

396

397

## Types & Enums

398

399

```python { .api }

400

class ObjectType(str, Enum):

401

"""Types of objects in LlamaIndex."""

402

TEXT = "text"

403

IMAGE = "image"

404

INDEX = "index"

405

DOCUMENT = "document"

406

407

class MetadataMode(str, Enum):

408

"""Modes for including metadata in content."""

409

ALL = "all" # Include all metadata

410

EMBED = "embed" # Include only embedding metadata

411

LLM = "llm" # Include only LLM metadata

412

NONE = "none" # Include no metadata

413

414

class Modality(str, Enum):

415

"""Content modalities supported."""

416

TEXT = "text"

417

IMAGE = "image"

418

AUDIO = "audio"

419

VIDEO = "video"

420

```