or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

aggregations.mdanalysis.mdconnections.mddocument-operations.mdfield-types.mdindex-management.mdindex.mdsearch-queries.md

document-operations.mddocs/

0

# Document Operations

1

2

Object-relational mapping for Elasticsearch documents providing automatic index management, CRUD operations, bulk processing, and lifecycle hooks. The Document class bridges Python objects and Elasticsearch documents while maintaining type safety and providing convenient persistence methods.

3

4

## Capabilities

5

6

### Document Class Definition

7

8

Base class for creating Elasticsearch documents with field definitions, index configuration, and persistence methods.

9

10

```python { .api }

11

class Document:

12

"""

13

Base class for Elasticsearch documents.

14

15

Attributes are automatically converted to appropriate Field instances

16

based on their type annotations or assignments.

17

"""

18

def __init__(self, meta=None, **kwargs):

19

"""

20

Initialize document instance.

21

22

Args:

23

meta (dict, optional): Document metadata (id, index, etc.)

24

**kwargs: Field values for the document

25

"""

26

27

def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs):

28

"""

29

Save document to Elasticsearch.

30

31

Args:

32

using (str, optional): Connection alias to use

33

index (str, optional): Index name override

34

validate (bool): Whether to validate before saving

35

skip_empty (bool): Skip empty fields

36

**kwargs: Additional Elasticsearch index parameters

37

38

Returns:

39

bool: True if document was created, False if updated

40

"""

41

42

def delete(self, using=None, index=None, **kwargs):

43

"""

44

Delete document from Elasticsearch.

45

46

Args:

47

using (str, optional): Connection alias to use

48

index (str, optional): Index name override

49

**kwargs: Additional Elasticsearch delete parameters

50

51

Returns:

52

bool: True if document was deleted

53

"""

54

55

def update(self, using=None, index=None, detect_noop=True, **kwargs):

56

"""

57

Update document in Elasticsearch.

58

59

Args:

60

using (str, optional): Connection alias to use

61

index (str, optional): Index name override

62

detect_noop (bool): Detect if update is a no-op

63

**kwargs: Additional update parameters

64

65

Returns:

66

dict: Update response from Elasticsearch

67

"""

68

69

@classmethod

70

def get(cls, id, using=None, index=None, **kwargs):

71

"""

72

Retrieve document by ID.

73

74

Args:

75

id: Document ID

76

using (str, optional): Connection alias to use

77

index (str, optional): Index name override

78

**kwargs: Additional get parameters

79

80

Returns:

81

Document: Document instance

82

83

Raises:

84

NotFoundError: If document doesn't exist

85

"""

86

87

@classmethod

88

def mget(cls, docs, using=None, index=None, raise_on_error=True, **kwargs):

89

"""

90

Multi-get documents by IDs.

91

92

Args:

93

docs (list): List of document IDs or dicts with ID and other params

94

using (str, optional): Connection alias to use

95

index (str, optional): Index name override

96

raise_on_error (bool): Raise exception on missing documents

97

**kwargs: Additional mget parameters

98

99

Returns:

100

list: List of Document instances (None for missing docs if not raising)

101

"""

102

103

@classmethod

104

def search(cls, using=None, index=None):

105

"""

106

Create Search instance for this document type.

107

108

Args:

109

using (str, optional): Connection alias to use

110

index (str, optional): Index name override

111

112

Returns:

113

Search: Search instance configured for this document type

114

"""

115

116

@classmethod

117

def init(cls, index=None, using=None, **kwargs):

118

"""

119

Create index and put mapping for this document.

120

121

Args:

122

index (str, optional): Index name override

123

using (str, optional): Connection alias to use

124

**kwargs: Additional index creation parameters

125

"""

126

127

def to_dict(self, include_meta=False, skip_empty=True):

128

"""

129

Convert document to dictionary.

130

131

Args:

132

include_meta (bool): Include document metadata

133

skip_empty (bool): Skip empty fields

134

135

Returns:

136

dict: Document as dictionary

137

"""

138

139

@classmethod

140

def from_dict(cls, d):

141

"""

142

Create document instance from dictionary.

143

144

Args:

145

d (dict): Dictionary with document data

146

147

Returns:

148

Document: Document instance

149

"""

150

```

151

152

### Async Document Operations

153

154

Asynchronous version of Document class for async/await operations.

155

156

```python { .api }

157

class AsyncDocument:

158

"""

159

Async version of Document class for async/await operations.

160

"""

161

162

async def save(self, using=None, index=None, validate=True, skip_empty=True, **kwargs):

163

"""

164

Async save document to Elasticsearch.

165

166

Args:

167

using (str, optional): Connection alias to use

168

index (str, optional): Index name override

169

validate (bool): Whether to validate before saving

170

skip_empty (bool): Skip empty fields

171

**kwargs: Additional Elasticsearch index parameters

172

173

Returns:

174

bool: True if document was created, False if updated

175

"""

176

177

async def delete(self, using=None, index=None, **kwargs):

178

"""

179

Async delete document from Elasticsearch.

180

181

Args:

182

using (str, optional): Connection alias to use

183

index (str, optional): Index name override

184

**kwargs: Additional Elasticsearch delete parameters

185

186

Returns:

187

bool: True if document was deleted

188

"""

189

190

async def update(self, using=None, index=None, detect_noop=True, **kwargs):

191

"""

192

Async update document in Elasticsearch.

193

194

Args:

195

using (str, optional): Connection alias to use

196

index (str, optional): Index name override

197

detect_noop (bool): Detect if update is a no-op

198

**kwargs: Additional update parameters

199

200

Returns:

201

dict: Update response from Elasticsearch

202

"""

203

204

@classmethod

205

async def get(cls, id, using=None, index=None, **kwargs):

206

"""

207

Async retrieve document by ID.

208

209

Args:

210

id: Document ID

211

using (str, optional): Connection alias to use

212

index (str, optional): Index name override

213

**kwargs: Additional get parameters

214

215

Returns:

216

AsyncDocument: Document instance

217

218

Raises:

219

NotFoundError: If document doesn't exist

220

"""

221

222

@classmethod

223

async def mget(cls, docs, using=None, index=None, raise_on_error=True, **kwargs):

224

"""

225

Async multi-get documents by IDs.

226

227

Args:

228

docs (list): List of document IDs or dicts with ID and other params

229

using (str, optional): Connection alias to use

230

index (str, optional): Index name override

231

raise_on_error (bool): Raise exception on missing documents

232

**kwargs: Additional mget parameters

233

234

Returns:

235

list: List of AsyncDocument instances

236

"""

237

238

@classmethod

239

async def init(cls, index=None, using=None, **kwargs):

240

"""

241

Async create index and put mapping for this document.

242

243

Args:

244

index (str, optional): Index name override

245

using (str, optional): Connection alias to use

246

**kwargs: Additional index creation parameters

247

"""

248

```

249

250

### Inner Document Definition

251

252

For nested document definitions within other documents.

253

254

```python { .api }

255

class InnerDoc:

256

"""

257

Base class for nested document definitions.

258

259

Used to define object and nested field structures within documents.

260

"""

261

262

def __init__(self, **kwargs):

263

"""

264

Initialize inner document.

265

266

Args:

267

**kwargs: Field values for the inner document

268

"""

269

270

def to_dict(self, skip_empty=True):

271

"""

272

Convert inner document to dictionary.

273

274

Args:

275

skip_empty (bool): Skip empty fields

276

277

Returns:

278

dict: Inner document as dictionary

279

"""

280

```

281

282

### Index Configuration

283

284

Configure index settings and mappings within Document classes.

285

286

```python { .api }

287

class Index:

288

"""

289

Index configuration class used within Document definitions.

290

291

Example:

292

class MyDoc(Document):

293

title = Text()

294

295

class Index:

296

name = 'my_index'

297

settings = {

298

'number_of_shards': 2,

299

'number_of_replicas': 1

300

}

301

"""

302

name: str # Index name

303

settings: dict # Index settings

304

aliases: dict # Index aliases

305

```

306

307

### Meta Information

308

309

Document metadata handling for ID, index, routing, and other Elasticsearch document properties.

310

311

```python { .api }

312

class Meta:

313

"""

314

Document metadata container.

315

316

Accessible via document.meta property.

317

"""

318

id: str # Document ID

319

index: str # Document index

320

doc_type: str # Document type (deprecated in ES 7+)

321

routing: str # Document routing

322

parent: str # Parent document ID (for parent-child)

323

version: int # Document version

324

seq_no: int # Sequence number

325

primary_term: int # Primary term

326

score: float # Search score (when from search results)

327

```

328

329

### Update by Query Operations

330

331

Update multiple documents matching a query.

332

333

```python { .api }

334

class UpdateByQuery:

335

"""

336

Update documents matching a query.

337

"""

338

339

def __init__(self, using=None, index=None):

340

"""

341

Initialize update by query operation.

342

343

Args:

344

using (str, optional): Connection alias to use

345

index (str or list, optional): Index name(s) to update

346

"""

347

348

def script(self, **kwargs):

349

"""

350

Set update script.

351

352

Args:

353

**kwargs: Script parameters

354

355

Returns:

356

UpdateByQuery: Current instance with script applied

357

"""

358

359

def query(self, query, **kwargs):

360

"""

361

Set query to match documents for update.

362

363

Args:

364

query (str or Query): Query to match documents

365

**kwargs: Query parameters if query is a string

366

367

Returns:

368

UpdateByQuery: Current instance with query applied

369

"""

370

371

def filter(self, query, **kwargs):

372

"""

373

Add filter to update by query.

374

375

Args:

376

query (str or Query): Filter query

377

**kwargs: Filter parameters if query is a string

378

379

Returns:

380

UpdateByQuery: Current instance with filter applied

381

"""

382

383

def execute(self):

384

"""

385

Execute update by query operation.

386

387

Returns:

388

dict: Update by query response with statistics

389

"""

390

391

def params(self, **kwargs):

392

"""

393

Set update by query parameters.

394

395

Args:

396

**kwargs: Update parameters

397

398

Parameters:

399

conflicts (str): How to handle conflicts ('abort' or 'proceed')

400

refresh (bool or str): Refresh policy

401

timeout (str): Operation timeout

402

wait_for_active_shards (str): Wait for active shards

403

wait_for_completion (bool): Wait for completion

404

requests_per_second (int): Throttling rate

405

scroll_size (int): Scroll batch size

406

pipeline (str): Ingest pipeline to use

407

408

Returns:

409

UpdateByQuery: Current instance with parameters applied

410

"""

411

412

class AsyncUpdateByQuery:

413

"""

414

Async version of UpdateByQuery for async/await operations.

415

"""

416

417

def __init__(self, using=None, index=None):

418

"""Initialize async update by query operation."""

419

420

def script(self, **kwargs):

421

"""Set update script (same as UpdateByQuery)."""

422

423

def query(self, query, **kwargs):

424

"""Set query to match documents (same as UpdateByQuery)."""

425

426

def filter(self, query, **kwargs):

427

"""Add filter (same as UpdateByQuery)."""

428

429

def params(self, **kwargs):

430

"""Set parameters (same as UpdateByQuery)."""

431

432

async def execute(self):

433

"""

434

Async execute update by query operation.

435

436

Returns:

437

dict: Update by query response with statistics

438

"""

439

```

440

441

### Delete by Query Operations

442

443

Delete multiple documents matching a query.

444

445

```python { .api }

446

class DeleteByQuery:

447

"""

448

Delete documents matching a query.

449

"""

450

451

def __init__(self, using=None, index=None):

452

"""

453

Initialize delete by query operation.

454

455

Args:

456

using (str, optional): Connection alias to use

457

index (str or list, optional): Index name(s) to delete from

458

"""

459

460

def query(self, query, **kwargs):

461

"""

462

Set query to match documents for deletion.

463

464

Args:

465

query (str or Query): Query to match documents

466

**kwargs: Query parameters if query is a string

467

468

Returns:

469

DeleteByQuery: Current instance with query applied

470

"""

471

472

def filter(self, query, **kwargs):

473

"""

474

Add filter to delete by query.

475

476

Args:

477

query (str or Query): Filter query

478

**kwargs: Filter parameters if query is a string

479

480

Returns:

481

DeleteByQuery: Current instance with filter applied

482

"""

483

484

def execute(self):

485

"""

486

Execute delete by query operation.

487

488

Returns:

489

dict: Delete by query response with statistics

490

"""

491

492

def params(self, **kwargs):

493

"""

494

Set delete by query parameters.

495

496

Args:

497

**kwargs: Delete parameters

498

499

Parameters:

500

conflicts (str): How to handle conflicts ('abort' or 'proceed')

501

refresh (bool or str): Refresh policy

502

timeout (str): Operation timeout

503

wait_for_active_shards (str): Wait for active shards

504

wait_for_completion (bool): Wait for completion

505

requests_per_second (int): Throttling rate

506

scroll_size (int): Scroll batch size

507

508

Returns:

509

DeleteByQuery: Current instance with parameters applied

510

"""

511

512

class AsyncDeleteByQuery:

513

"""

514

Async version of DeleteByQuery for async/await operations.

515

"""

516

517

def __init__(self, using=None, index=None):

518

"""Initialize async delete by query operation."""

519

520

def query(self, query, **kwargs):

521

"""Set query to match documents (same as DeleteByQuery)."""

522

523

def filter(self, query, **kwargs):

524

"""Add filter (same as DeleteByQuery)."""

525

526

def params(self, **kwargs):

527

"""Set parameters (same as DeleteByQuery)."""

528

529

async def execute(self):

530

"""

531

Async execute delete by query operation.

532

533

Returns:

534

dict: Delete by query response with statistics

535

"""

536

```

537

538

### Reindex Operations

539

540

Reindex documents from source to destination index.

541

542

```python { .api }

543

class Reindex:

544

"""

545

Reindex documents from source to destination.

546

"""

547

548

def __init__(self, using=None):

549

"""

550

Initialize reindex operation.

551

552

Args:

553

using (str, optional): Connection alias to use

554

"""

555

556

def source(self, **kwargs):

557

"""

558

Configure source for reindex operation.

559

560

Args:

561

**kwargs: Source configuration

562

563

Parameters:

564

index (str or list): Source index name(s)

565

query (dict): Query to filter source documents

566

sort (list): Sort order for source documents

567

_source (list or dict): Source field filtering

568

size (int): Batch size for reindexing

569

570

Returns:

571

Reindex: Current instance with source configured

572

"""

573

574

def dest(self, **kwargs):

575

"""

576

Configure destination for reindex operation.

577

578

Args:

579

**kwargs: Destination configuration

580

581

Parameters:

582

index (str): Destination index name

583

type (str): Destination document type (deprecated)

584

routing (str): Routing for destination documents

585

op_type (str): Operation type ('index' or 'create')

586

version_type (str): Version type for conflicts

587

pipeline (str): Ingest pipeline to use

588

589

Returns:

590

Reindex: Current instance with destination configured

591

"""

592

593

def script(self, **kwargs):

594

"""

595

Set reindex script for document transformation.

596

597

Args:

598

**kwargs: Script configuration

599

600

Returns:

601

Reindex: Current instance with script applied

602

"""

603

604

def execute(self):

605

"""

606

Execute reindex operation.

607

608

Returns:

609

dict: Reindex response with statistics

610

"""

611

612

def params(self, **kwargs):

613

"""

614

Set reindex parameters.

615

616

Args:

617

**kwargs: Reindex parameters

618

619

Parameters:

620

conflicts (str): How to handle conflicts ('abort' or 'proceed')

621

refresh (bool or str): Refresh policy

622

timeout (str): Operation timeout

623

wait_for_active_shards (str): Wait for active shards

624

wait_for_completion (bool): Wait for completion

625

requests_per_second (int): Throttling rate

626

627

Returns:

628

Reindex: Current instance with parameters applied

629

"""

630

631

class AsyncReindex:

632

"""

633

Async version of Reindex for async/await operations.

634

"""

635

636

def __init__(self, using=None):

637

"""Initialize async reindex operation."""

638

639

def source(self, **kwargs):

640

"""Configure source (same as Reindex)."""

641

642

def dest(self, **kwargs):

643

"""Configure destination (same as Reindex)."""

644

645

def script(self, **kwargs):

646

"""Set script (same as Reindex)."""

647

648

def params(self, **kwargs):

649

"""Set parameters (same as Reindex)."""

650

651

async def execute(self):

652

"""

653

Async execute reindex operation.

654

655

Returns:

656

dict: Reindex response with statistics

657

"""

658

```

659

660

## Usage Examples

661

662

### Basic Document Definition and Operations

663

664

```python

665

from elasticsearch_dsl import Document, Text, Keyword, Date, Integer, connections

666

667

# Configure connection

668

connections.create_connection(hosts=['localhost:9200'])

669

670

class BlogPost(Document):

671

title = Text(analyzer='snowball')

672

content = Text()

673

author = Keyword()

674

published = Date()

675

views = Integer()

676

677

class Index:

678

name = 'blog'

679

settings = {

680

'number_of_shards': 2,

681

}

682

683

# Create index and mapping

684

BlogPost.init()

685

686

# Create and save document

687

post = BlogPost(

688

title='My First Post',

689

content='This is the content of my first blog post...',

690

author='john_doe',

691

published='2023-10-01T10:30:00',

692

views=0

693

)

694

post.save()

695

696

# Retrieve document

697

retrieved_post = BlogPost.get(id=post.meta.id)

698

print(f"Post: {retrieved_post.title} by {retrieved_post.author}")

699

700

# Update document

701

retrieved_post.views = 10

702

retrieved_post.save()

703

704

# Delete document

705

retrieved_post.delete()

706

```

707

708

### Nested and Object Fields

709

710

```python

711

from elasticsearch_dsl import Document, Text, Object, Nested, InnerDoc

712

713

class Address(InnerDoc):

714

street = Text()

715

city = Text()

716

country = Keyword()

717

718

class Comment(InnerDoc):

719

author = Keyword()

720

content = Text()

721

timestamp = Date()

722

723

class User(Document):

724

name = Text()

725

email = Keyword()

726

address = Object(Address) # Single nested object

727

comments = Nested(Comment) # Array of nested objects

728

729

class Index:

730

name = 'users'

731

732

# Create user with nested data

733

user = User(

734

name='John Doe',

735

email='john@example.com',

736

address=Address(

737

street='123 Main St',

738

city='New York',

739

country='USA'

740

),

741

comments=[

742

Comment(

743

author='friend1',

744

content='Great profile!',

745

timestamp='2023-10-01T12:00:00'

746

)

747

]

748

)

749

user.save()

750

```

751

752

### Bulk Operations

753

754

```python

755

from elasticsearch_dsl import Document, Text, connections

756

from elasticsearch.helpers import bulk

757

758

class Article(Document):

759

title = Text()

760

content = Text()

761

762

class Index:

763

name = 'articles'

764

765

# Bulk create documents

766

articles = [

767

Article(title=f'Article {i}', content=f'Content for article {i}')

768

for i in range(100)

769

]

770

771

# Bulk save using elasticsearch-py helper

772

actions = [

773

article.to_dict(include_meta=True)

774

for article in articles

775

]

776

bulk(connections.get_connection(), actions)

777

```