or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bulk-operations.mdclient.mdfacets-aggregations.mdfilters.mdindex.mdmappings.mdquery-dsl.mdrivers.md

mappings.mddocs/

0

# PyES Mappings and Schema Management

1

2

## Overview

3

4

PyES provides comprehensive mapping management for defining ElasticSearch index schemas. Mappings define how documents and their fields are stored and indexed, including field types, analyzers, and indexing options. Proper mapping design is crucial for search performance, data integrity, and storage efficiency.

5

6

## Core Mapping Classes

7

8

### Mapper

9

10

```python { .api }

11

class Mapper:

12

"""

13

Main mapping management class for ElasticSearch indices.

14

15

Handles document type mappings, field definitions, and schema operations.

16

"""

17

18

def __init__(self):

19

"""Initialize Mapper instance."""

20

pass

21

22

def get_doctype(self, name):

23

"""

24

Get document type mapping by name.

25

26

Args:

27

name (str): Document type name

28

29

Returns:

30

DocumentObjectField: Document type mapping

31

"""

32

pass

33

34

def to_dict(self):

35

"""

36

Convert mapper to dictionary format.

37

38

Returns:

39

dict: Mapping dictionary for ElasticSearch

40

"""

41

pass

42

43

def add_property(self, name, field):

44

"""

45

Add field property to mapping.

46

47

Args:

48

name (str): Field name

49

field (AbstractField): Field definition

50

"""

51

pass

52

53

def create_index_if_missing(self, index_name):

54

"""

55

Create index if it doesn't exist.

56

57

Args:

58

index_name (str): Index name to create

59

"""

60

pass

61

62

# Basic mapper usage

63

from pyes import Mapper, StringField, IntegerField, DateField

64

65

# Create mapping for blog posts

66

blog_mapping = Mapper()

67

blog_mapping.add_property("title", StringField(analyzer="standard"))

68

blog_mapping.add_property("content", StringField(analyzer="english"))

69

blog_mapping.add_property("view_count", IntegerField())

70

blog_mapping.add_property("published_date", DateField())

71

72

# Apply mapping to index

73

es.indices.put_mapping("blog_post", blog_mapping.to_dict(), indices=["blog"])

74

```

75

76

## Base Field Classes

77

78

### AbstractField

79

80

```python { .api }

81

class AbstractField:

82

"""

83

Base class for all field types.

84

85

Defines common field properties and behavior.

86

"""

87

88

def __init__(self, index=None, store=None, boost=None,

89

null_value=None, include_in_all=None, **kwargs):

90

"""

91

Initialize base field.

92

93

Args:

94

index (str, optional): Index option (analyzed, not_analyzed, no)

95

store (bool, optional): Store field value separately

96

boost (float, optional): Field boost factor for scoring

97

null_value (any, optional): Default value for null fields

98

include_in_all (bool, optional): Include field in _all field

99

**kwargs: Additional field-specific parameters

100

"""

101

pass

102

103

def as_dict(self):

104

"""

105

Convert field to dictionary representation.

106

107

Returns:

108

dict: Field definition for ElasticSearch mapping

109

"""

110

pass

111

```

112

113

## String and Text Fields

114

115

### StringField

116

117

```python { .api }

118

class StringField(AbstractField):

119

"""

120

String/text field for textual content.

121

122

Supports full-text search, analysis, and various string operations.

123

"""

124

125

def __init__(self, analyzer=None, index_analyzer=None, search_analyzer=None,

126

index=None, store=None, term_vector=None, boost=None,

127

null_value=None, omit_norms=None, omit_term_freq_and_positions=None,

128

include_in_all=None, **kwargs):

129

"""

130

Initialize StringField.

131

132

Args:

133

analyzer (str, optional): Analyzer for indexing and searching

134

index_analyzer (str, optional): Analyzer for indexing only

135

search_analyzer (str, optional): Analyzer for searching only

136

index (str, optional): Index option (analyzed, not_analyzed, no)

137

store (bool, optional): Store original field value

138

term_vector (str, optional): Term vector option (no, yes, with_offsets,

139

with_positions, with_positions_offsets)

140

boost (float, optional): Field boost for relevance

141

null_value (str, optional): Default value for null

142

omit_norms (bool, optional): Omit field-length normalization

143

omit_term_freq_and_positions (bool, optional): Omit term frequency/positions

144

include_in_all (bool, optional): Include in _all field

145

**kwargs: Additional string field parameters

146

"""

147

pass

148

149

# Text field configurations

150

from pyes import StringField

151

152

# Full-text search field with English analyzer

153

content_field = StringField(

154

analyzer="english",

155

term_vector="with_positions_offsets", # For highlighting

156

store=False # Don't store original (use _source)

157

)

158

159

# Exact-match keyword field

160

category_field = StringField(

161

index="not_analyzed", # No analysis for exact matching

162

store=True,

163

boost=1.5

164

)

165

166

# Multi-language field with custom analyzer

167

title_field = StringField(

168

analyzer="standard",

169

search_analyzer="english", # Different analyzer for search

170

include_in_all=True

171

)

172

173

# Non-indexed field for display only

174

description_field = StringField(

175

index="no", # Not searchable

176

store=True # But stored for retrieval

177

)

178

```

179

180

## Numeric Fields

181

182

### Base Numeric Field

183

184

```python { .api }

185

class NumericFieldAbstract(AbstractField):

186

"""

187

Base class for numeric field types.

188

189

Provides common numeric field functionality.

190

"""

191

192

def __init__(self, precision_step=None, **kwargs):

193

"""

194

Initialize numeric field.

195

196

Args:

197

precision_step (int, optional): Precision step for range queries

198

**kwargs: Additional numeric field parameters

199

"""

200

pass

201

```

202

203

### Integer Fields

204

205

```python { .api }

206

class IntegerField(NumericFieldAbstract):

207

"""

208

32-bit signed integer field (-2^31 to 2^31-1).

209

"""

210

211

def __init__(self, **kwargs):

212

"""Initialize IntegerField."""

213

pass

214

215

class LongField(NumericFieldAbstract):

216

"""

217

64-bit signed integer field (-2^63 to 2^63-1).

218

"""

219

220

def __init__(self, **kwargs):

221

"""Initialize LongField."""

222

pass

223

224

class ShortField(NumericFieldAbstract):

225

"""

226

16-bit signed integer field (-32,768 to 32,767).

227

"""

228

229

def __init__(self, **kwargs):

230

"""Initialize ShortField."""

231

pass

232

233

class ByteField(NumericFieldAbstract):

234

"""

235

8-bit signed integer field (-128 to 127).

236

"""

237

238

def __init__(self, **kwargs):

239

"""Initialize ByteField."""

240

pass

241

242

# Integer field usage

243

from pyes import IntegerField, LongField, ShortField, ByteField

244

245

# Standard counters and IDs

246

user_id_field = IntegerField()

247

view_count_field = IntegerField(null_value=0)

248

249

# Large numbers (timestamps, large counters)

250

timestamp_field = LongField()

251

total_bytes_field = LongField()

252

253

# Small numbers (status codes, categories)

254

status_code_field = ShortField()

255

priority_field = ByteField(null_value=0)

256

```

257

258

### Floating Point Fields

259

260

```python { .api }

261

class FloatField(NumericFieldAbstract):

262

"""

263

32-bit floating point field (IEEE 754).

264

"""

265

266

def __init__(self, **kwargs):

267

"""Initialize FloatField."""

268

pass

269

270

class DoubleField(NumericFieldAbstract):

271

"""

272

64-bit floating point field (IEEE 754).

273

"""

274

275

def __init__(self, **kwargs):

276

"""Initialize DoubleField."""

277

pass

278

279

# Floating point usage

280

from pyes import FloatField, DoubleField

281

282

# Standard precision

283

price_field = FloatField(null_value=0.0)

284

rating_field = FloatField()

285

286

# High precision calculations

287

latitude_field = DoubleField()

288

longitude_field = DoubleField()

289

precise_calculation_field = DoubleField()

290

```

291

292

## Specialized Fields

293

294

### Date Field

295

296

```python { .api }

297

class DateField(AbstractField):

298

"""

299

Date and datetime field with flexible format support.

300

"""

301

302

def __init__(self, format=None, precision_step=None, **kwargs):

303

"""

304

Initialize DateField.

305

306

Args:

307

format (str, optional): Date format pattern(s)

308

precision_step (int, optional): Precision step for range queries

309

**kwargs: Additional date field parameters

310

"""

311

pass

312

313

# Date field configurations

314

from pyes import DateField

315

316

# ISO date format (default)

317

published_date_field = DateField()

318

319

# Custom date format

320

custom_date_field = DateField(format="yyyy-MM-dd HH:mm:ss")

321

322

# Multiple date formats

323

flexible_date_field = DateField(

324

format="yyyy-MM-dd||yyyy-MM-dd HH:mm:ss||epoch_millis"

325

)

326

327

# Date with precision step for better range performance

328

timestamp_field = DateField(

329

precision_step=4, # Better range query performance

330

format="epoch_millis"

331

)

332

```

333

334

### Boolean Field

335

336

```python { .api }

337

class BooleanField(AbstractField):

338

"""

339

Boolean field for true/false values.

340

"""

341

342

def __init__(self, **kwargs):

343

"""

344

Initialize BooleanField.

345

346

Args:

347

**kwargs: Additional boolean field parameters

348

"""

349

pass

350

351

# Boolean field usage

352

from pyes import BooleanField

353

354

# Simple boolean flags

355

is_published_field = BooleanField(null_value=False)

356

featured_field = BooleanField()

357

is_active_field = BooleanField(null_value=True)

358

```

359

360

### Binary Field

361

362

```python { .api }

363

class BinaryField(AbstractField):

364

"""

365

Binary data field for storing base64-encoded binary data.

366

"""

367

368

def __init__(self, **kwargs):

369

"""

370

Initialize BinaryField.

371

372

Args:

373

**kwargs: Additional binary field parameters

374

"""

375

pass

376

377

# Binary data storage

378

from pyes import BinaryField

379

380

# File attachments

381

file_content_field = BinaryField(store=True)

382

thumbnail_field = BinaryField()

383

encrypted_data_field = BinaryField()

384

```

385

386

### IP Address Field

387

388

```python { .api }

389

class IpField(AbstractField):

390

"""

391

IP address field for IPv4 addresses.

392

"""

393

394

def __init__(self, **kwargs):

395

"""

396

Initialize IpField.

397

398

Args:

399

**kwargs: Additional IP field parameters

400

"""

401

pass

402

403

# IP address tracking

404

from pyes import IpField

405

406

# Network addresses

407

client_ip_field = IpField()

408

server_ip_field = IpField()

409

proxy_ip_field = IpField()

410

```

411

412

## Geospatial Fields

413

414

### Geo Point Field

415

416

```python { .api }

417

class GeoPointField(AbstractField):

418

"""

419

Geographic point field for latitude/longitude coordinates.

420

"""

421

422

def __init__(self, lat_lon=None, geohash=None, geohash_precision=None, **kwargs):

423

"""

424

Initialize GeoPointField.

425

426

Args:

427

lat_lon (bool, optional): Enable lat/lon format

428

geohash (bool, optional): Enable geohash format

429

geohash_precision (int, optional): Geohash precision level

430

**kwargs: Additional geo point parameters

431

"""

432

pass

433

434

# Geographic location fields

435

from pyes import GeoPointField

436

437

# Basic location tracking

438

location_field = GeoPointField()

439

440

# Location with geohash support for proximity searches

441

restaurant_location_field = GeoPointField(

442

lat_lon=True,

443

geohash=True,

444

geohash_precision=12

445

)

446

447

# Event location

448

event_coordinates_field = GeoPointField(lat_lon=True)

449

```

450

451

## Complex Field Types

452

453

### Multi Field

454

455

```python { .api }

456

class MultiField(AbstractField):

457

"""

458

Multi-field mapping for analyzing the same content in different ways.

459

460

Allows a field to be indexed multiple times with different analyzers.

461

"""

462

463

def __init__(self, name, type=None, path="just_name", fields=None, **kwargs):

464

"""

465

Initialize MultiField.

466

467

Args:

468

name (str): Field name

469

type (str, optional): Main field type

470

path (str): Path type for field names. Default: "just_name"

471

fields (dict, optional): Sub-field definitions

472

**kwargs: Additional multi-field parameters

473

"""

474

pass

475

476

# Multi-field for different analysis approaches

477

from pyes import MultiField, StringField

478

479

# Title field with both analyzed and exact versions

480

title_multifield = MultiField("title", type="string", fields={

481

"analyzed": StringField(analyzer="english"),

482

"exact": StringField(index="not_analyzed"),

483

"suggest": StringField(analyzer="simple")

484

})

485

486

# Name field with different analyzers

487

name_multifield = MultiField("name", type="string", fields={

488

"standard": StringField(analyzer="standard"),

489

"keyword": StringField(index="not_analyzed"),

490

"phonetic": StringField(analyzer="phonetic_analyzer")

491

})

492

```

493

494

### Object Field

495

496

```python { .api }

497

class ObjectField(AbstractField):

498

"""

499

Object field for nested JSON objects with properties.

500

"""

501

502

def __init__(self, properties=None, dynamic=None, enabled=None,

503

include_in_all=None, **kwargs):

504

"""

505

Initialize ObjectField.

506

507

Args:

508

properties (dict, optional): Object property definitions

509

dynamic (bool|str, optional): Dynamic mapping behavior

510

enabled (bool, optional): Enable/disable object indexing

511

include_in_all (bool, optional): Include in _all field

512

**kwargs: Additional object field parameters

513

"""

514

pass

515

516

# Nested object structures

517

from pyes import ObjectField

518

519

# Address object with properties

520

address_field = ObjectField(properties={

521

"street": StringField(),

522

"city": StringField(index="not_analyzed"),

523

"state": StringField(index="not_analyzed"),

524

"zip_code": StringField(index="not_analyzed"),

525

"country": StringField(index="not_analyzed")

526

})

527

528

# User profile object

529

profile_field = ObjectField(

530

dynamic=True, # Allow new properties

531

properties={

532

"display_name": StringField(analyzer="standard"),

533

"bio": StringField(analyzer="english"),

534

"avatar_url": StringField(index="no"),

535

"social_links": ObjectField(enabled=False) # Store but don't index

536

}

537

)

538

```

539

540

### Nested Object Field

541

542

```python { .api }

543

class NestedObject(AbstractField):

544

"""

545

Nested object field that maintains object relationships.

546

547

Unlike ObjectField, NestedObject preserves the relationship between

548

properties within the same nested object.

549

"""

550

551

def __init__(self, properties=None, dynamic=None, include_in_all=None, **kwargs):

552

"""

553

Initialize NestedObject.

554

555

Args:

556

properties (dict, optional): Nested object property definitions

557

dynamic (bool|str, optional): Dynamic mapping behavior

558

include_in_all (bool, optional): Include in _all field

559

**kwargs: Additional nested object parameters

560

"""

561

pass

562

563

# Nested objects with preserved relationships

564

from pyes import NestedObject

565

566

# Product variants as nested objects

567

variants_field = NestedObject(properties={

568

"sku": StringField(index="not_analyzed"),

569

"color": StringField(index="not_analyzed"),

570

"size": StringField(index="not_analyzed"),

571

"price": FloatField(),

572

"stock_quantity": IntegerField(),

573

"is_available": BooleanField()

574

})

575

576

# Comment threads as nested objects

577

comments_field = NestedObject(properties={

578

"author": StringField(index="not_analyzed"),

579

"content": StringField(analyzer="english"),

580

"timestamp": DateField(),

581

"rating": IntegerField(),

582

"is_approved": BooleanField()

583

})

584

```

585

586

### Document Object Field

587

588

```python { .api }

589

class DocumentObjectField:

590

"""

591

Document-level mapping definition.

592

593

Represents the top-level mapping for a document type.

594

"""

595

596

def __init__(self, name=None, **kwargs):

597

"""

598

Initialize DocumentObjectField.

599

600

Args:

601

name (str, optional): Document type name

602

**kwargs: Document-level mapping parameters

603

"""

604

pass

605

606

def add_property(self, name, field):

607

"""

608

Add property to document mapping.

609

610

Args:

611

name (str): Property name

612

field (AbstractField): Field definition

613

"""

614

pass

615

616

# Complete document mapping

617

from pyes import DocumentObjectField

618

619

# Blog post document mapping

620

blog_post_mapping = DocumentObjectField("blog_post")

621

blog_post_mapping.add_property("title", StringField(analyzer="english", boost=2.0))

622

blog_post_mapping.add_property("content", StringField(analyzer="english"))

623

blog_post_mapping.add_property("summary", StringField(analyzer="english"))

624

blog_post_mapping.add_property("author", StringField(index="not_analyzed"))

625

blog_post_mapping.add_property("category", StringField(index="not_analyzed"))

626

blog_post_mapping.add_property("tags", StringField(index="not_analyzed"))

627

blog_post_mapping.add_property("published_date", DateField())

628

blog_post_mapping.add_property("view_count", IntegerField(null_value=0))

629

blog_post_mapping.add_property("rating", FloatField())

630

blog_post_mapping.add_property("is_featured", BooleanField(null_value=False))

631

blog_post_mapping.add_property("location", GeoPointField())

632

```

633

634

### Attachment Field

635

636

```python { .api }

637

class AttachmentField(AbstractField):

638

"""

639

Attachment field for file content extraction and indexing.

640

641

Requires mapper-attachments plugin for ElasticSearch.

642

"""

643

644

def __init__(self, **kwargs):

645

"""

646

Initialize AttachmentField.

647

648

Args:

649

**kwargs: Additional attachment field parameters

650

"""

651

pass

652

653

# File attachment indexing

654

from pyes import AttachmentField

655

656

# Document attachment with extracted content

657

file_attachment_field = AttachmentField()

658

659

# The attachment field will automatically extract:

660

# - content: Extracted text content

661

# - title: Document title

662

# - author: Document author

663

# - keywords: Document keywords

664

# - date: Document creation date

665

# - content_type: File MIME type

666

# - content_length: File size

667

# - language: Detected language

668

```

669

670

## Complete Mapping Examples

671

672

### E-commerce Product Mapping

673

674

```python { .api }

675

# Comprehensive e-commerce product mapping

676

from pyes import (Mapper, StringField, IntegerField, FloatField, BooleanField,

677

DateField, MultiField, NestedObject, ObjectField)

678

679

def create_product_mapping():

680

"""Create comprehensive product mapping for e-commerce."""

681

682

mapping = Mapper()

683

684

# Basic product information

685

mapping.add_property("name", MultiField("name", type="string", fields={

686

"analyzed": StringField(analyzer="english", boost=2.0),

687

"exact": StringField(index="not_analyzed"),

688

"suggest": StringField(analyzer="simple")

689

}))

690

691

mapping.add_property("description", StringField(

692

analyzer="english",

693

term_vector="with_positions_offsets"

694

))

695

696

mapping.add_property("sku", StringField(index="not_analyzed"))

697

mapping.add_property("brand", StringField(index="not_analyzed", boost=1.5))

698

mapping.add_property("category", StringField(index="not_analyzed"))

699

700

# Pricing and inventory

701

mapping.add_property("price", FloatField())

702

mapping.add_property("sale_price", FloatField())

703

mapping.add_property("cost", FloatField())

704

mapping.add_property("stock_quantity", IntegerField(null_value=0))

705

mapping.add_property("is_in_stock", BooleanField())

706

mapping.add_property("low_stock_threshold", IntegerField())

707

708

# Product status

709

mapping.add_property("is_active", BooleanField(null_value=True))

710

mapping.add_property("is_featured", BooleanField(null_value=False))

711

mapping.add_property("is_on_sale", BooleanField(null_value=False))

712

713

# Dates

714

mapping.add_property("created_date", DateField())

715

mapping.add_property("updated_date", DateField())

716

mapping.add_property("launch_date", DateField())

717

mapping.add_property("discontinue_date", DateField())

718

719

# Ratings and reviews

720

mapping.add_property("average_rating", FloatField())

721

mapping.add_property("review_count", IntegerField(null_value=0))

722

mapping.add_property("total_sales", IntegerField(null_value=0))

723

724

# Product variants as nested objects

725

mapping.add_property("variants", NestedObject(properties={

726

"sku": StringField(index="not_analyzed"),

727

"color": StringField(index="not_analyzed"),

728

"size": StringField(index="not_analyzed"),

729

"material": StringField(index="not_analyzed"),

730

"price": FloatField(),

731

"stock_quantity": IntegerField(),

732

"is_available": BooleanField(),

733

"weight": FloatField(),

734

"dimensions": ObjectField(properties={

735

"length": FloatField(),

736

"width": FloatField(),

737

"height": FloatField()

738

})

739

}))

740

741

# Product attributes (dynamic object)

742

mapping.add_property("attributes", ObjectField(

743

dynamic=True, # Allow arbitrary attributes

744

properties={

745

"color": StringField(index="not_analyzed"),

746

"size": StringField(index="not_analyzed"),

747

"material": StringField(index="not_analyzed"),

748

"style": StringField(index="not_analyzed")

749

}

750

))

751

752

# SEO fields

753

mapping.add_property("meta_title", StringField(analyzer="english"))

754

mapping.add_property("meta_description", StringField(analyzer="english"))

755

mapping.add_property("keywords", StringField(analyzer="keyword"))

756

mapping.add_property("url_slug", StringField(index="not_analyzed"))

757

758

# Images

759

mapping.add_property("primary_image", StringField(index="no"))

760

mapping.add_property("gallery_images", StringField(index="no"))

761

762

return mapping

763

764

# Apply product mapping

765

product_mapping = create_product_mapping()

766

es.indices.put_mapping("product", product_mapping.to_dict(), indices=["catalog"])

767

```

768

769

### User Profile Mapping

770

771

```python { .api }

772

# User profile and account mapping

773

def create_user_mapping():

774

"""Create comprehensive user profile mapping."""

775

776

mapping = Mapper()

777

778

# Basic user information

779

mapping.add_property("username", StringField(index="not_analyzed"))

780

mapping.add_property("email", StringField(index="not_analyzed"))

781

mapping.add_property("first_name", StringField(analyzer="standard"))

782

mapping.add_property("last_name", StringField(analyzer="standard"))

783

784

# Full name with multi-field analysis

785

mapping.add_property("full_name", MultiField("full_name", type="string", fields={

786

"analyzed": StringField(analyzer="standard"),

787

"exact": StringField(index="not_analyzed"),

788

"phonetic": StringField(analyzer="phonetic") # Custom analyzer needed

789

}))

790

791

# Profile information

792

mapping.add_property("bio", StringField(analyzer="english"))

793

mapping.add_property("title", StringField(analyzer="standard"))

794

mapping.add_property("company", StringField(index="not_analyzed"))

795

mapping.add_property("department", StringField(index="not_analyzed"))

796

797

# Contact information

798

mapping.add_property("phone", StringField(index="not_analyzed"))

799

mapping.add_property("mobile", StringField(index="not_analyzed"))

800

mapping.add_property("website", StringField(index="no"))

801

802

# Address as nested object

803

mapping.add_property("addresses", NestedObject(properties={

804

"type": StringField(index="not_analyzed"), # home, work, billing

805

"street": StringField(),

806

"city": StringField(index="not_analyzed"),

807

"state": StringField(index="not_analyzed"),

808

"postal_code": StringField(index="not_analyzed"),

809

"country": StringField(index="not_analyzed"),

810

"is_primary": BooleanField()

811

}))

812

813

# User status and flags

814

mapping.add_property("is_active", BooleanField(null_value=True))

815

mapping.add_property("is_verified", BooleanField(null_value=False))

816

mapping.add_property("is_premium", BooleanField(null_value=False))

817

mapping.add_property("account_type", StringField(index="not_analyzed"))

818

819

# Dates and timestamps

820

mapping.add_property("created_date", DateField())

821

mapping.add_property("last_login", DateField())

822

mapping.add_property("last_updated", DateField())

823

mapping.add_property("birth_date", DateField())

824

825

# Preferences and settings

826

mapping.add_property("preferences", ObjectField(

827

dynamic=True,

828

properties={

829

"language": StringField(index="not_analyzed"),

830

"timezone": StringField(index="not_analyzed"),

831

"notifications": ObjectField(enabled=False), # Store but don't index

832

"privacy_settings": ObjectField(enabled=False)

833

}

834

))

835

836

# Social and interests

837

mapping.add_property("interests", StringField(index="not_analyzed"))

838

mapping.add_property("skills", StringField(index="not_analyzed"))

839

mapping.add_property("social_links", ObjectField(

840

properties={

841

"linkedin": StringField(index="no"),

842

"twitter": StringField(index="no"),

843

"github": StringField(index="no")

844

}

845

))

846

847

# Activity metrics

848

mapping.add_property("login_count", IntegerField(null_value=0))

849

mapping.add_property("post_count", IntegerField(null_value=0))

850

mapping.add_property("reputation_score", IntegerField(null_value=0))

851

852

return mapping

853

854

# Apply user mapping

855

user_mapping = create_user_mapping()

856

es.indices.put_mapping("user", user_mapping.to_dict(), indices=["users"])

857

```

858

859

### Content Management Mapping

860

861

```python { .api }

862

# CMS content mapping with rich media support

863

def create_content_mapping():

864

"""Create comprehensive content management mapping."""

865

866

mapping = Mapper()

867

868

# Content identification

869

mapping.add_property("title", MultiField("title", type="string", fields={

870

"analyzed": StringField(analyzer="english", boost=3.0),

871

"exact": StringField(index="not_analyzed"),

872

"suggest": StringField(analyzer="simple")

873

}))

874

875

mapping.add_property("slug", StringField(index="not_analyzed"))

876

mapping.add_property("content_id", StringField(index="not_analyzed"))

877

878

# Content body with rich analysis

879

mapping.add_property("content", StringField(

880

analyzer="english",

881

term_vector="with_positions_offsets", # For highlighting

882

store=False # Use _source instead

883

))

884

885

mapping.add_property("excerpt", StringField(analyzer="english"))

886

mapping.add_property("summary", StringField(analyzer="english"))

887

888

# Content metadata

889

mapping.add_property("content_type", StringField(index="not_analyzed"))

890

mapping.add_property("format", StringField(index="not_analyzed")) # html, markdown, etc.

891

mapping.add_property("language", StringField(index="not_analyzed"))

892

mapping.add_property("word_count", IntegerField())

893

mapping.add_property("reading_time", IntegerField()) # minutes

894

895

# Authoring information

896

mapping.add_property("author", ObjectField(properties={

897

"id": StringField(index="not_analyzed"),

898

"name": StringField(analyzer="standard"),

899

"email": StringField(index="not_analyzed"),

900

"bio": StringField(analyzer="english")

901

}))

902

903

mapping.add_property("contributors", NestedObject(properties={

904

"id": StringField(index="not_analyzed"),

905

"name": StringField(analyzer="standard"),

906

"role": StringField(index="not_analyzed") # editor, reviewer, etc.

907

}))

908

909

# Publication workflow

910

mapping.add_property("status", StringField(index="not_analyzed"))

911

mapping.add_property("workflow_state", StringField(index="not_analyzed"))

912

mapping.add_property("is_published", BooleanField())

913

mapping.add_property("is_featured", BooleanField(null_value=False))

914

mapping.add_property("is_premium", BooleanField(null_value=False))

915

916

# Dates and scheduling

917

mapping.add_property("created_date", DateField())

918

mapping.add_property("updated_date", DateField())

919

mapping.add_property("published_date", DateField())

920

mapping.add_property("scheduled_date", DateField())

921

mapping.add_property("expiry_date", DateField())

922

923

# Categorization and tagging

924

mapping.add_property("category", StringField(index="not_analyzed"))

925

mapping.add_property("subcategory", StringField(index="not_analyzed"))

926

mapping.add_property("tags", StringField(index="not_analyzed"))

927

mapping.add_property("topics", StringField(index="not_analyzed"))

928

929

# SEO and social

930

mapping.add_property("seo", ObjectField(properties={

931

"meta_title": StringField(analyzer="english"),

932

"meta_description": StringField(analyzer="english"),

933

"keywords": StringField(analyzer="keyword"),

934

"canonical_url": StringField(index="no"),

935

"og_title": StringField(analyzer="english"),

936

"og_description": StringField(analyzer="english"),

937

"og_image": StringField(index="no")

938

}))

939

940

# Media attachments

941

mapping.add_property("media", NestedObject(properties={

942

"type": StringField(index="not_analyzed"), # image, video, audio, document

943

"url": StringField(index="no"),

944

"title": StringField(analyzer="standard"),

945

"alt_text": StringField(analyzer="english"),

946

"caption": StringField(analyzer="english"),

947

"file_size": IntegerField(),

948

"mime_type": StringField(index="not_analyzed"),

949

"dimensions": ObjectField(properties={

950

"width": IntegerField(),

951

"height": IntegerField()

952

})

953

}))

954

955

# Engagement metrics

956

mapping.add_property("view_count", IntegerField(null_value=0))

957

mapping.add_property("like_count", IntegerField(null_value=0))

958

mapping.add_property("share_count", IntegerField(null_value=0))

959

mapping.add_property("comment_count", IntegerField(null_value=0))

960

mapping.add_property("average_rating", FloatField())

961

962

# Content relationships

963

mapping.add_property("related_content", StringField(index="not_analyzed"))

964

mapping.add_property("parent_content", StringField(index="not_analyzed"))

965

mapping.add_property("series_id", StringField(index="not_analyzed"))

966

967

return mapping

968

969

# Apply content mapping

970

content_mapping = create_content_mapping()

971

es.indices.put_mapping("content", content_mapping.to_dict(), indices=["cms"])

972

```

973

974

## Mapping Management Operations

975

976

### Dynamic Mapping Control

977

978

```python { .api }

979

# Control dynamic mapping behavior

980

def configure_dynamic_mapping():

981

"""Configure dynamic mapping settings for flexible schemas."""

982

983

# Strict mapping (no new fields allowed)

984

strict_mapping = {

985

"dynamic": "strict",

986

"properties": {

987

"title": {"type": "string", "analyzer": "english"},

988

"content": {"type": "string", "analyzer": "english"}

989

}

990

}

991

992

# Dynamic mapping with templates

993

dynamic_mapping = {

994

"dynamic": True,

995

"dynamic_templates": [

996

{

997

"strings_as_keywords": {

998

"match": "*_keyword",

999

"mapping": {

1000

"type": "string",

1001

"index": "not_analyzed"

1002

}

1003

}

1004

},

1005

{

1006

"strings_as_text": {

1007

"match": "*_text",

1008

"mapping": {

1009

"type": "string",

1010

"analyzer": "english"

1011

}

1012

}

1013

},

1014

{

1015

"dates": {

1016

"match": "*_date",

1017

"mapping": {

1018

"type": "date",

1019

"format": "yyyy-MM-dd||epoch_millis"

1020

}

1021

}

1022

}

1023

],

1024

"properties": {

1025

# Explicit field definitions

1026

"id": {"type": "string", "index": "not_analyzed"}

1027

}

1028

}

1029

1030

# Apply dynamic mapping

1031

es.indices.put_mapping("flexible_doc", dynamic_mapping, indices=["flexible"])

1032

1033

return dynamic_mapping

1034

1035

# Index settings for mapping optimization

1036

def create_optimized_index_settings():

1037

"""Create index with optimized settings for mapping performance."""

1038

1039

settings = {

1040

"settings": {

1041

"number_of_shards": 5,

1042

"number_of_replicas": 1,

1043

"analysis": {

1044

"analyzer": {

1045

"custom_english": {

1046

"type": "english",

1047

"stopwords": ["the", "and", "or", "but"]

1048

},

1049

"autocomplete": {

1050

"tokenizer": "keyword",

1051

"filters": ["lowercase", "edge_ngram"]

1052

}

1053

},

1054

"filter": {

1055

"edge_ngram": {

1056

"type": "edge_ngram",

1057

"min_gram": 1,

1058

"max_gram": 20

1059

}

1060

}

1061

}

1062

},

1063

"mappings": {

1064

"document": {

1065

"properties": {

1066

"title": {

1067

"type": "multi_field",

1068

"fields": {

1069

"analyzed": {"type": "string", "analyzer": "custom_english"},

1070

"autocomplete": {"type": "string", "analyzer": "autocomplete"}

1071

}

1072

}

1073

}

1074

}

1075

}

1076

}

1077

1078

# Create index with settings and mapping

1079

es.indices.create_index("optimized_index", settings)

1080

1081

return settings

1082

```

1083

1084

## Best Practices

1085

1086

### Performance Optimization

1087

1088

```python { .api }

1089

# Mapping performance optimization strategies

1090

def optimize_mapping_performance():

1091

"""Best practices for high-performance mappings."""

1092

1093

# 1. Use appropriate field types

1094

optimized_mapping = Mapper()

1095

1096

# Use keyword fields for exact matches (faster than analyzed strings)

1097

optimized_mapping.add_property("status", StringField(index="not_analyzed"))

1098

1099

# Use appropriate numeric types (don't use long for small numbers)

1100

optimized_mapping.add_property("count", IntegerField()) # Not LongField

1101

1102

# Disable indexing for display-only fields

1103

optimized_mapping.add_property("description", StringField(index="no", store=True))

1104

1105

# 2. Optimize string field settings

1106

# Disable norms for fields that don't need scoring

1107

optimized_mapping.add_property("category", StringField(

1108

index="not_analyzed",

1109

omit_norms=True # Saves space, faster filtering

1110

))

1111

1112

# Use appropriate term vectors (only when needed)

1113

optimized_mapping.add_property("content", StringField(

1114

analyzer="english",

1115

term_vector="with_positions_offsets", # Only if highlighting needed

1116

store=False # Use _source instead of stored fields

1117

))

1118

1119

# 3. Optimize date fields

1120

optimized_mapping.add_property("timestamp", DateField(

1121

precision_step=4, # Better range query performance

1122

format="epoch_millis" # Faster parsing

1123

))

1124

1125

# 4. Use doc_values for sorting/aggregation fields

1126

optimized_mapping.add_property("sort_field", StringField(

1127

index="not_analyzed",

1128

doc_values=True # Faster sorting/aggregation

1129

))

1130

1131

return optimized_mapping

1132

1133

# Memory optimization

1134

def optimize_mapping_memory():

1135

"""Optimize mapping for memory usage."""

1136

1137

memory_mapping = Mapper()

1138

1139

# Disable _all field if not needed (saves space and indexing time)

1140

memory_mapping._all = {"enabled": False}

1141

1142

# Use compressed storage for large text fields

1143

memory_mapping.add_property("large_content", StringField(

1144

analyzer="english",

1145

compress=True, # Compress stored content

1146

compress_threshold="100b" # Compress if larger than 100 bytes

1147

))

1148

1149

# Disable source for fields not needed in results

1150

memory_mapping._source = {

1151

"excludes": ["internal_field", "temp_data"]

1152

}

1153

1154

return memory_mapping

1155

```

1156

1157

### Schema Evolution

1158

1159

```python { .api }

1160

# Handle mapping changes and schema evolution

1161

def handle_mapping_evolution():

1162

"""Strategies for evolving mappings over time."""

1163

1164

# 1. Additive changes (safe)

1165

def add_new_field():

1166

"""Add new field to existing mapping."""

1167

new_field_mapping = {

1168

"properties": {

1169

"new_feature": {"type": "string", "analyzer": "english"}

1170

}

1171

}

1172

es.indices.put_mapping("document", new_field_mapping, indices=["myindex"])

1173

1174

# 2. Breaking changes (require reindexing)

1175

def change_field_type():

1176

"""Handle field type changes that require reindexing."""

1177

1178

# Create new index with updated mapping

1179

new_mapping = create_updated_mapping()

1180

es.indices.create_index("myindex_v2")

1181

es.indices.put_mapping("document", new_mapping.to_dict(), indices=["myindex_v2"])

1182

1183

# Reindex data (in production, use scroll/bulk for large datasets)

1184

# This is a simplified example

1185

old_docs = es.search({"query": {"match_all": {}}}, indices=["myindex"], size=1000)

1186

1187

for doc in old_docs:

1188

# Transform document if needed

1189

transformed_doc = transform_document(doc)

1190

es.index(transformed_doc, "myindex_v2", "document", id=doc._meta.id)

1191

1192

# Switch aliases

1193

es.indices.change_aliases([

1194

{"remove": {"index": "myindex", "alias": "current"}},

1195

{"add": {"index": "myindex_v2", "alias": "current"}}

1196

])

1197

1198

# 3. Version-aware mapping

1199

def create_versioned_mapping():

1200

"""Create mapping with version information for tracking."""

1201

1202

versioned_mapping = Mapper()

1203

versioned_mapping.add_property("_mapping_version", IntegerField())

1204

versioned_mapping.add_property("title", StringField(analyzer="english"))

1205

# ... other fields

1206

1207

return versioned_mapping

1208

1209

def create_updated_mapping():

1210

"""Create updated mapping for schema evolution."""

1211

1212

mapping = Mapper()

1213

# Updated field definitions

1214

mapping.add_property("title", StringField(analyzer="english"))

1215

mapping.add_property("content", StringField(analyzer="english"))

1216

# Changed field type

1217

mapping.add_property("price", DoubleField()) # Changed from FloatField

1218

1219

return mapping

1220

1221

def transform_document(doc):

1222

"""Transform document during reindexing."""

1223

1224

# Example transformations

1225

if hasattr(doc, 'old_field'):

1226

doc.new_field = transform_old_field(doc.old_field)

1227

delattr(doc, 'old_field')

1228

1229

return doc

1230

```

1231

1232

PyES mapping management provides comprehensive control over ElasticSearch index schemas, enabling efficient data storage, fast searching, and proper data type handling while supporting schema evolution and performance optimization.