or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

async-operations.mdauthentication.mdcore-client.mddocument-modeling.mddsl-queries.mdhelper-functions.mdindex.mdnamespaced-apis.mdplugin-apis.md

document-modeling.mddocs/

0

# Document Modeling

1

2

Object-relational mapping (ORM) style document modeling with field definitions, automatic mapping generation, and validation for structured data handling in OpenSearch. The DSL provides a Pythonic way to define document schemas and interact with OpenSearch indices.

3

4

## Capabilities

5

6

### Document Base Class

7

8

Base class for creating document models with ORM-like functionality.

9

10

```python { .api }

11

class Document:

12

def __init__(self, **kwargs):

13

"""

14

Initialize document instance with field values.

15

16

Parameters:

17

- **kwargs: Field values for the document

18

"""

19

20

def save(self, using=None, index=None, **kwargs):

21

"""

22

Save document to OpenSearch.

23

24

Parameters:

25

- using: OpenSearch client instance

26

- index (str, optional): Index name (uses class Meta if not provided)

27

- refresh (str/bool, optional): Refresh policy

28

- routing (str, optional): Routing value

29

- pipeline (str, optional): Ingest pipeline

30

31

Returns:

32

Document instance with updated meta information

33

"""

34

35

def update(self, using=None, index=None, **kwargs):

36

"""

37

Update document in OpenSearch.

38

39

Parameters:

40

- using: OpenSearch client instance

41

- index (str, optional): Index name

42

- refresh (str/bool, optional): Refresh policy

43

- retry_on_conflict (int, optional): Retry on version conflict

44

45

Returns:

46

Updated document instance

47

"""

48

49

def delete(self, using=None, index=None, **kwargs):

50

"""

51

Delete document from OpenSearch.

52

53

Parameters:

54

- using: OpenSearch client instance

55

- index (str, optional): Index name

56

- refresh (str/bool, optional): Refresh policy

57

58

Returns:

59

Deletion response

60

"""

61

62

@classmethod

63

def get(cls, id, using=None, index=None, **kwargs):

64

"""

65

Retrieve document by ID.

66

67

Parameters:

68

- id: Document ID

69

- using: OpenSearch client instance

70

- index (str, optional): Index name

71

72

Returns:

73

Document instance

74

75

Raises:

76

NotFoundError: If document doesn't exist

77

"""

78

79

@classmethod

80

def mget(cls, docs, using=None, index=None, **kwargs):

81

"""

82

Retrieve multiple documents by ID.

83

84

Parameters:

85

- docs: List of document IDs or dicts with id/index

86

- using: OpenSearch client instance

87

- index (str, optional): Default index name

88

89

Returns:

90

List of document instances

91

"""

92

93

@classmethod

94

def search(cls, using=None, index=None):

95

"""

96

Get Search object for this document type.

97

98

Parameters:

99

- using: OpenSearch client instance

100

- index (str, optional): Index name

101

102

Returns:

103

Search instance configured for this document type

104

"""

105

106

@classmethod

107

def exists(cls, id, using=None, index=None, **kwargs):

108

"""

109

Check if document exists.

110

111

Parameters:

112

- id: Document ID

113

- using: OpenSearch client instance

114

- index (str, optional): Index name

115

116

Returns:

117

bool: True if document exists

118

"""

119

120

def to_dict(self, include_meta=False, skip_empty=True):

121

"""

122

Convert document to dictionary.

123

124

Parameters:

125

- include_meta (bool): Include metadata fields

126

- skip_empty (bool): Skip fields with empty values

127

128

Returns:

129

dict: Document as dictionary

130

"""

131

132

@classmethod

133

def from_dict(cls, d):

134

"""

135

Create document instance from dictionary.

136

137

Parameters:

138

- d (dict): Document data

139

140

Returns:

141

Document instance

142

"""

143

```

144

145

### Inner Document Class

146

147

For modeling nested objects within documents.

148

149

```python { .api }

150

class InnerDoc:

151

def __init__(self, **kwargs):

152

"""

153

Initialize inner document with field values.

154

155

Parameters:

156

- **kwargs: Field values for the inner document

157

"""

158

159

def to_dict(self, skip_empty=True):

160

"""

161

Convert inner document to dictionary.

162

163

Parameters:

164

- skip_empty (bool): Skip fields with empty values

165

166

Returns:

167

dict: Inner document as dictionary

168

"""

169

170

@classmethod

171

def from_dict(cls, d):

172

"""

173

Create inner document from dictionary.

174

175

Parameters:

176

- d (dict): Inner document data

177

178

Returns:

179

InnerDoc instance

180

"""

181

```

182

183

### Mapping Management

184

185

Define and manage index mappings programmatically.

186

187

```python { .api }

188

class Mapping:

189

def __init__(self):

190

"""Initialize empty mapping."""

191

192

def field(self, name, field_type, **kwargs):

193

"""

194

Add field to mapping.

195

196

Parameters:

197

- name (str): Field name

198

- field_type (str/Field): Field type or Field instance

199

- **kwargs: Field parameters

200

201

Returns:

202

self (for chaining)

203

"""

204

205

def meta(self, name, **kwargs):

206

"""

207

Add metadata field to mapping.

208

209

Parameters:

210

- name (str): Meta field name

211

- **kwargs: Meta field parameters

212

213

Returns:

214

self (for chaining)

215

"""

216

217

def save(self, index, using=None, **kwargs):

218

"""

219

Save mapping to OpenSearch index.

220

221

Parameters:

222

- index (str): Index name

223

- using: OpenSearch client instance

224

- **kwargs: Additional mapping parameters

225

226

Returns:

227

Mapping creation response

228

"""

229

230

def update_from_opensearch(self, index, using=None):

231

"""

232

Update mapping from existing OpenSearch index.

233

234

Parameters:

235

- index (str): Index name

236

- using: OpenSearch client instance

237

238

Returns:

239

self

240

"""

241

242

def to_dict(self):

243

"""

244

Convert mapping to dictionary.

245

246

Returns:

247

dict: Mapping as dictionary

248

"""

249

```

250

251

### Index Management

252

253

Manage OpenSearch indices with settings and mappings.

254

255

```python { .api }

256

class Index:

257

def __init__(self, name, using=None):

258

"""

259

Initialize index manager.

260

261

Parameters:

262

- name (str): Index name

263

- using: OpenSearch client instance

264

"""

265

266

def settings(self, **kwargs):

267

"""

268

Set index settings.

269

270

Parameters:

271

- **kwargs: Index settings

272

273

Returns:

274

self (for chaining)

275

"""

276

277

def mapping(self, mapping):

278

"""

279

Set index mapping.

280

281

Parameters:

282

- mapping (Mapping): Mapping instance

283

284

Returns:

285

self (for chaining)

286

"""

287

288

def doc_type(self, document):

289

"""

290

Register document type with index.

291

292

Parameters:

293

- document (Document): Document class

294

295

Returns:

296

self (for chaining)

297

"""

298

299

def analyzer(self, name, **kwargs):

300

"""

301

Add custom analyzer to index.

302

303

Parameters:

304

- name (str): Analyzer name

305

- **kwargs: Analyzer configuration

306

307

Returns:

308

self (for chaining)

309

"""

310

311

def create(self, **kwargs):

312

"""

313

Create the index in OpenSearch.

314

315

Parameters:

316

- **kwargs: Index creation parameters

317

318

Returns:

319

Index creation response

320

"""

321

322

def delete(self, **kwargs):

323

"""

324

Delete the index from OpenSearch.

325

326

Parameters:

327

- **kwargs: Index deletion parameters

328

329

Returns:

330

Index deletion response

331

"""

332

333

def exists(self):

334

"""

335

Check if index exists.

336

337

Returns:

338

bool: True if index exists

339

"""

340

341

def open(self, **kwargs):

342

"""

343

Open the index.

344

345

Parameters:

346

- **kwargs: Index open parameters

347

348

Returns:

349

Index open response

350

"""

351

352

def close(self, **kwargs):

353

"""

354

Close the index.

355

356

Parameters:

357

- **kwargs: Index close parameters

358

359

Returns:

360

Index close response

361

"""

362

```

363

364

## Usage Examples

365

366

### Basic Document Model

367

368

```python

369

from opensearchpy import Document, Text, Keyword, Integer, Date

370

from datetime import datetime

371

372

class Article(Document):

373

title = Text(analyzer='standard')

374

content = Text()

375

author = Keyword()

376

category = Keyword()

377

published_date = Date()

378

view_count = Integer()

379

tags = Keyword(multi=True)

380

381

class Index:

382

name = 'articles'

383

settings = {

384

'number_of_shards': 1,

385

'number_of_replicas': 0

386

}

387

388

class Meta:

389

doc_type = '_doc'

390

391

# Create and save document

392

article = Article(

393

title='Introduction to OpenSearch',

394

content='OpenSearch is a powerful search and analytics engine...',

395

author='john_doe',

396

category='technology',

397

published_date=datetime.now(),

398

view_count=0,

399

tags=['search', 'analytics', 'opensource']

400

)

401

402

# Save to OpenSearch

403

article.meta.id = 'article-1'

404

article.save(using=client)

405

406

print(f"Article saved with ID: {article.meta.id}")

407

```

408

409

### Nested Document Modeling

410

411

```python

412

from opensearchpy import Document, InnerDoc, Nested, Text, Keyword, Integer, Date

413

414

class Comment(InnerDoc):

415

author = Keyword()

416

content = Text()

417

created_date = Date()

418

rating = Integer()

419

420

class Product(Document):

421

name = Text()

422

description = Text()

423

category = Keyword()

424

price = Integer()

425

comments = Nested(Comment)

426

427

class Index:

428

name = 'products'

429

430

# Create product with nested comments

431

product = Product(

432

name='Wireless Headphones',

433

description='High-quality wireless headphones with noise cancellation',

434

category='electronics',

435

price=199,

436

comments=[

437

Comment(

438

author='user1',

439

content='Great sound quality!',

440

created_date=datetime.now(),

441

rating=5

442

),

443

Comment(

444

author='user2',

445

content='Good value for money',

446

created_date=datetime.now(),

447

rating=4

448

)

449

]

450

)

451

452

product.save(using=client)

453

```

454

455

### Custom Field Types and Validation

456

457

```python

458

from opensearchpy import Document, Field, ValidationException

459

from opensearchpy.field import Text, Keyword, Integer

460

461

class EmailField(Keyword):

462

def clean(self, data):

463

if data and '@' not in data:

464

raise ValidationException('Invalid email format')

465

return super().clean(data)

466

467

class User(Document):

468

username = Keyword(required=True)

469

email = EmailField(required=True)

470

full_name = Text()

471

age = Integer()

472

bio = Text()

473

474

def clean(self):

475

# Document-level validation

476

if self.age and self.age < 0:

477

raise ValidationException('Age cannot be negative')

478

479

if self.username and len(self.username) < 3:

480

raise ValidationException('Username must be at least 3 characters')

481

482

def save(self, **kwargs):

483

# Custom save logic

484

self.clean()

485

return super().save(**kwargs)

486

487

class Index:

488

name = 'users'

489

490

# Create user with validation

491

user = User(

492

username='johndoe',

493

email='john@example.com',

494

full_name='John Doe',

495

age=30,

496

bio='Software developer interested in search technologies'

497

)

498

499

user.save(using=client)

500

```

501

502

### Document Relationships

503

504

```python

505

from opensearchpy import Document, Join, Text, Keyword, Integer

506

507

class BlogPost(Document):

508

title = Text()

509

content = Text()

510

author = Keyword()

511

post_comment = Join(relations={'post': 'comment'})

512

513

class Index:

514

name = 'blog'

515

516

class Comment(Document):

517

content = Text()

518

author = Keyword()

519

post_comment = Join(relations={'post': 'comment'})

520

521

class Index:

522

name = 'blog'

523

524

# Create parent document (blog post)

525

post = BlogPost(

526

title='My First Blog Post',

527

content='This is the content of my first blog post...',

528

author='blogger',

529

post_comment={'name': 'post'}

530

)

531

post.meta.id = 'post-1'

532

post.save(using=client)

533

534

# Create child document (comment)

535

comment = Comment(

536

content='Great post!',

537

author='reader',

538

post_comment={'name': 'comment', 'parent': 'post-1'}

539

)

540

comment.meta.id = 'comment-1'

541

comment.meta.routing = 'post-1' # Route to same shard as parent

542

comment.save(using=client)

543

```

544

545

### Dynamic Document Templates

546

547

```python

548

from opensearchpy import Document, DynamicDocument, Text, Keyword

549

550

class FlexibleDocument(DynamicDocument):

551

"""Document that accepts any fields dynamically."""

552

title = Text(required=True)

553

category = Keyword()

554

555

class Index:

556

name = 'flexible_docs'

557

settings = {

558

'mappings': {

559

'dynamic': True,

560

'dynamic_templates': [

561

{

562

'strings_as_keywords': {

563

'match_mapping_type': 'string',

564

'mapping': {

565

'type': 'keyword'

566

}

567

}

568

}

569

]

570

}

571

}

572

573

# Create document with dynamic fields

574

doc = FlexibleDocument(

575

title='Dynamic Document',

576

category='example',

577

# These fields will be added dynamically

578

custom_field='custom_value',

579

numerical_data=42,

580

metadata={'version': '1.0', 'source': 'api'}

581

)

582

583

doc.save(using=client)

584

```

585

586

### Bulk Document Operations

587

588

```python

589

from opensearchpy.helpers import parallel_bulk

590

591

def generate_articles(count=1000):

592

"""Generate article documents."""

593

for i in range(count):

594

article = Article(

595

title=f'Article {i}',

596

content=f'Content for article {i}...',

597

author=f'author_{i % 10}',

598

category='technology',

599

published_date=datetime.now(),

600

view_count=0,

601

tags=['tag1', 'tag2']

602

)

603

article.meta.id = f'article-{i}'

604

yield article.to_dict(include_meta=True)

605

606

# Bulk save articles

607

for success, info in parallel_bulk(

608

client,

609

generate_articles(1000),

610

index='articles',

611

chunk_size=100

612

):

613

if not success:

614

print(f'Failed to index: {info}')

615

616

print('Bulk indexing completed')

617

```

618

619

### Search with Document Models

620

621

```python

622

from opensearchpy import Search, Q

623

624

# Search using document model

625

s = Article.search(using=client)

626

s = s.query(Q('match', title='OpenSearch'))

627

s = s.filter(Q('term', category='technology'))

628

s = s.sort('-published_date')

629

630

# Execute search and get document instances

631

response = s.execute()

632

633

for article in response:

634

print(f'Title: {article.title}')

635

print(f'Author: {article.author}')

636

print(f'Published: {article.published_date}')

637

print('---')

638

639

# Aggregations with document models

640

s = Article.search(using=client)

641

s.aggs.bucket('authors', 'terms', field='author', size=10)

642

s.aggs.bucket('categories', 'terms', field='category')

643

644

response = s.execute()

645

646

print('Top authors:')

647

for bucket in response.aggregations.authors.buckets:

648

print(f' {bucket.key}: {bucket.doc_count} articles')

649

```

650

651

### Index Management with Documents

652

653

```python

654

from opensearchpy import Index

655

656

# Create index with custom settings

657

index = Index('articles', using=client)

658

index.settings(

659

number_of_shards=2,

660

number_of_replicas=1,

661

analysis={

662

'analyzer': {

663

'custom_text_analyzer': {

664

'type': 'custom',

665

'tokenizer': 'standard',

666

'filter': ['lowercase', 'stop', 'snowball']

667

}

668

}

669

}

670

)

671

672

# Register document type

673

index.doc_type(Article)

674

675

# Create the index

676

if not index.exists():

677

index.create()

678

print('Index created successfully')

679

680

# Update mapping for existing index

681

from opensearchpy import Mapping, Text, Keyword

682

683

mapping = Mapping()

684

mapping.field('title', Text(analyzer='custom_text_analyzer'))

685

mapping.field('summary', Text())

686

mapping.field('status', Keyword())

687

688

mapping.save('articles', using=client)

689

print('Mapping updated')

690

```