0
# PyES Mappings and Schema Management
1
2
## Overview
3
4
PyES provides comprehensive mapping management for defining ElasticSearch index schemas. Mappings define how documents and their fields are stored and indexed, including field types, analyzers, and indexing options. Proper mapping design is crucial for search performance, data integrity, and storage efficiency.
5
6
## Core Mapping Classes
7
8
### Mapper
9
10
```python { .api }
11
class Mapper:
12
"""
13
Main mapping management class for ElasticSearch indices.
14
15
Handles document type mappings, field definitions, and schema operations.
16
"""
17
18
def __init__(self):
19
"""Initialize Mapper instance."""
20
pass
21
22
def get_doctype(self, name):
23
"""
24
Get document type mapping by name.
25
26
Args:
27
name (str): Document type name
28
29
Returns:
30
DocumentObjectField: Document type mapping
31
"""
32
pass
33
34
def to_dict(self):
35
"""
36
Convert mapper to dictionary format.
37
38
Returns:
39
dict: Mapping dictionary for ElasticSearch
40
"""
41
pass
42
43
def add_property(self, name, field):
44
"""
45
Add field property to mapping.
46
47
Args:
48
name (str): Field name
49
field (AbstractField): Field definition
50
"""
51
pass
52
53
def create_index_if_missing(self, index_name):
54
"""
55
Create index if it doesn't exist.
56
57
Args:
58
index_name (str): Index name to create
59
"""
60
pass
61
62
# Basic mapper usage
63
from pyes import Mapper, StringField, IntegerField, DateField
64
65
# Create mapping for blog posts
66
blog_mapping = Mapper()
67
blog_mapping.add_property("title", StringField(analyzer="standard"))
68
blog_mapping.add_property("content", StringField(analyzer="english"))
69
blog_mapping.add_property("view_count", IntegerField())
70
blog_mapping.add_property("published_date", DateField())
71
72
# Apply mapping to index
73
es.indices.put_mapping("blog_post", blog_mapping.to_dict(), indices=["blog"])
74
```
75
76
## Base Field Classes
77
78
### AbstractField
79
80
```python { .api }
81
class AbstractField:
82
"""
83
Base class for all field types.
84
85
Defines common field properties and behavior.
86
"""
87
88
def __init__(self, index=None, store=None, boost=None,
89
null_value=None, include_in_all=None, **kwargs):
90
"""
91
Initialize base field.
92
93
Args:
94
index (str, optional): Index option (analyzed, not_analyzed, no)
95
store (bool, optional): Store field value separately
96
boost (float, optional): Field boost factor for scoring
97
null_value (any, optional): Default value for null fields
98
include_in_all (bool, optional): Include field in _all field
99
**kwargs: Additional field-specific parameters
100
"""
101
pass
102
103
def as_dict(self):
104
"""
105
Convert field to dictionary representation.
106
107
Returns:
108
dict: Field definition for ElasticSearch mapping
109
"""
110
pass
111
```
112
113
## String and Text Fields
114
115
### StringField
116
117
```python { .api }
118
class StringField(AbstractField):
119
"""
120
String/text field for textual content.
121
122
Supports full-text search, analysis, and various string operations.
123
"""
124
125
def __init__(self, analyzer=None, index_analyzer=None, search_analyzer=None,
126
index=None, store=None, term_vector=None, boost=None,
127
null_value=None, omit_norms=None, omit_term_freq_and_positions=None,
128
include_in_all=None, **kwargs):
129
"""
130
Initialize StringField.
131
132
Args:
133
analyzer (str, optional): Analyzer for indexing and searching
134
index_analyzer (str, optional): Analyzer for indexing only
135
search_analyzer (str, optional): Analyzer for searching only
136
index (str, optional): Index option (analyzed, not_analyzed, no)
137
store (bool, optional): Store original field value
138
term_vector (str, optional): Term vector option (no, yes, with_offsets,
139
with_positions, with_positions_offsets)
140
boost (float, optional): Field boost for relevance
141
null_value (str, optional): Default value for null
142
omit_norms (bool, optional): Omit field-length normalization
143
omit_term_freq_and_positions (bool, optional): Omit term frequency/positions
144
include_in_all (bool, optional): Include in _all field
145
**kwargs: Additional string field parameters
146
"""
147
pass
148
149
# Text field configurations
150
from pyes import StringField
151
152
# Full-text search field with English analyzer
153
content_field = StringField(
154
analyzer="english",
155
term_vector="with_positions_offsets", # For highlighting
156
store=False # Don't store original (use _source)
157
)
158
159
# Exact-match keyword field
160
category_field = StringField(
161
index="not_analyzed", # No analysis for exact matching
162
store=True,
163
boost=1.5
164
)
165
166
# Multi-language field with custom analyzer
167
title_field = StringField(
168
analyzer="standard",
169
search_analyzer="english", # Different analyzer for search
170
include_in_all=True
171
)
172
173
# Non-indexed field for display only
174
description_field = StringField(
175
index="no", # Not searchable
176
store=True # But stored for retrieval
177
)
178
```
179
180
## Numeric Fields
181
182
### Base Numeric Field
183
184
```python { .api }
185
class NumericFieldAbstract(AbstractField):
186
"""
187
Base class for numeric field types.
188
189
Provides common numeric field functionality.
190
"""
191
192
def __init__(self, precision_step=None, **kwargs):
193
"""
194
Initialize numeric field.
195
196
Args:
197
precision_step (int, optional): Precision step for range queries
198
**kwargs: Additional numeric field parameters
199
"""
200
pass
201
```
202
203
### Integer Fields
204
205
```python { .api }
206
class IntegerField(NumericFieldAbstract):
207
"""
208
32-bit signed integer field (-2^31 to 2^31-1).
209
"""
210
211
def __init__(self, **kwargs):
212
"""Initialize IntegerField."""
213
pass
214
215
class LongField(NumericFieldAbstract):
216
"""
217
64-bit signed integer field (-2^63 to 2^63-1).
218
"""
219
220
def __init__(self, **kwargs):
221
"""Initialize LongField."""
222
pass
223
224
class ShortField(NumericFieldAbstract):
225
"""
226
16-bit signed integer field (-32,768 to 32,767).
227
"""
228
229
def __init__(self, **kwargs):
230
"""Initialize ShortField."""
231
pass
232
233
class ByteField(NumericFieldAbstract):
234
"""
235
8-bit signed integer field (-128 to 127).
236
"""
237
238
def __init__(self, **kwargs):
239
"""Initialize ByteField."""
240
pass
241
242
# Integer field usage
243
from pyes import IntegerField, LongField, ShortField, ByteField
244
245
# Standard counters and IDs
246
user_id_field = IntegerField()
247
view_count_field = IntegerField(null_value=0)
248
249
# Large numbers (timestamps, large counters)
250
timestamp_field = LongField()
251
total_bytes_field = LongField()
252
253
# Small numbers (status codes, categories)
254
status_code_field = ShortField()
255
priority_field = ByteField(null_value=0)
256
```
257
258
### Floating Point Fields
259
260
```python { .api }
261
class FloatField(NumericFieldAbstract):
262
"""
263
32-bit floating point field (IEEE 754).
264
"""
265
266
def __init__(self, **kwargs):
267
"""Initialize FloatField."""
268
pass
269
270
class DoubleField(NumericFieldAbstract):
271
"""
272
64-bit floating point field (IEEE 754).
273
"""
274
275
def __init__(self, **kwargs):
276
"""Initialize DoubleField."""
277
pass
278
279
# Floating point usage
280
from pyes import FloatField, DoubleField
281
282
# Standard precision
283
price_field = FloatField(null_value=0.0)
284
rating_field = FloatField()
285
286
# High precision calculations
287
latitude_field = DoubleField()
288
longitude_field = DoubleField()
289
precise_calculation_field = DoubleField()
290
```
291
292
## Specialized Fields
293
294
### Date Field
295
296
```python { .api }
297
class DateField(AbstractField):
298
"""
299
Date and datetime field with flexible format support.
300
"""
301
302
def __init__(self, format=None, precision_step=None, **kwargs):
303
"""
304
Initialize DateField.
305
306
Args:
307
format (str, optional): Date format pattern(s)
308
precision_step (int, optional): Precision step for range queries
309
**kwargs: Additional date field parameters
310
"""
311
pass
312
313
# Date field configurations
314
from pyes import DateField
315
316
# ISO date format (default)
317
published_date_field = DateField()
318
319
# Custom date format
320
custom_date_field = DateField(format="yyyy-MM-dd HH:mm:ss")
321
322
# Multiple date formats
323
flexible_date_field = DateField(
324
format="yyyy-MM-dd||yyyy-MM-dd HH:mm:ss||epoch_millis"
325
)
326
327
# Date with precision step for better range performance
328
timestamp_field = DateField(
329
precision_step=4, # Better range query performance
330
format="epoch_millis"
331
)
332
```
333
334
### Boolean Field
335
336
```python { .api }
337
class BooleanField(AbstractField):
338
"""
339
Boolean field for true/false values.
340
"""
341
342
def __init__(self, **kwargs):
343
"""
344
Initialize BooleanField.
345
346
Args:
347
**kwargs: Additional boolean field parameters
348
"""
349
pass
350
351
# Boolean field usage
352
from pyes import BooleanField
353
354
# Simple boolean flags
355
is_published_field = BooleanField(null_value=False)
356
featured_field = BooleanField()
357
is_active_field = BooleanField(null_value=True)
358
```
359
360
### Binary Field
361
362
```python { .api }
363
class BinaryField(AbstractField):
364
"""
365
Binary data field for storing base64-encoded binary data.
366
"""
367
368
def __init__(self, **kwargs):
369
"""
370
Initialize BinaryField.
371
372
Args:
373
**kwargs: Additional binary field parameters
374
"""
375
pass
376
377
# Binary data storage
378
from pyes import BinaryField
379
380
# File attachments
381
file_content_field = BinaryField(store=True)
382
thumbnail_field = BinaryField()
383
encrypted_data_field = BinaryField()
384
```
385
386
### IP Address Field
387
388
```python { .api }
389
class IpField(AbstractField):
390
"""
391
IP address field for IPv4 addresses.
392
"""
393
394
def __init__(self, **kwargs):
395
"""
396
Initialize IpField.
397
398
Args:
399
**kwargs: Additional IP field parameters
400
"""
401
pass
402
403
# IP address tracking
404
from pyes import IpField
405
406
# Network addresses
407
client_ip_field = IpField()
408
server_ip_field = IpField()
409
proxy_ip_field = IpField()
410
```
411
412
## Geospatial Fields
413
414
### Geo Point Field
415
416
```python { .api }
417
class GeoPointField(AbstractField):
418
"""
419
Geographic point field for latitude/longitude coordinates.
420
"""
421
422
def __init__(self, lat_lon=None, geohash=None, geohash_precision=None, **kwargs):
423
"""
424
Initialize GeoPointField.
425
426
Args:
427
lat_lon (bool, optional): Enable lat/lon format
428
geohash (bool, optional): Enable geohash format
429
geohash_precision (int, optional): Geohash precision level
430
**kwargs: Additional geo point parameters
431
"""
432
pass
433
434
# Geographic location fields
435
from pyes import GeoPointField
436
437
# Basic location tracking
438
location_field = GeoPointField()
439
440
# Location with geohash support for proximity searches
441
restaurant_location_field = GeoPointField(
442
lat_lon=True,
443
geohash=True,
444
geohash_precision=12
445
)
446
447
# Event location
448
event_coordinates_field = GeoPointField(lat_lon=True)
449
```
450
451
## Complex Field Types
452
453
### Multi Field
454
455
```python { .api }
456
class MultiField(AbstractField):
457
"""
458
Multi-field mapping for analyzing the same content in different ways.
459
460
Allows a field to be indexed multiple times with different analyzers.
461
"""
462
463
def __init__(self, name, type=None, path="just_name", fields=None, **kwargs):
464
"""
465
Initialize MultiField.
466
467
Args:
468
name (str): Field name
469
type (str, optional): Main field type
470
path (str): Path type for field names. Default: "just_name"
471
fields (dict, optional): Sub-field definitions
472
**kwargs: Additional multi-field parameters
473
"""
474
pass
475
476
# Multi-field for different analysis approaches
477
from pyes import MultiField, StringField
478
479
# Title field with both analyzed and exact versions
480
title_multifield = MultiField("title", type="string", fields={
481
"analyzed": StringField(analyzer="english"),
482
"exact": StringField(index="not_analyzed"),
483
"suggest": StringField(analyzer="simple")
484
})
485
486
# Name field with different analyzers
487
name_multifield = MultiField("name", type="string", fields={
488
"standard": StringField(analyzer="standard"),
489
"keyword": StringField(index="not_analyzed"),
490
"phonetic": StringField(analyzer="phonetic_analyzer")
491
})
492
```
493
494
### Object Field
495
496
```python { .api }
497
class ObjectField(AbstractField):
498
"""
499
Object field for nested JSON objects with properties.
500
"""
501
502
def __init__(self, properties=None, dynamic=None, enabled=None,
503
include_in_all=None, **kwargs):
504
"""
505
Initialize ObjectField.
506
507
Args:
508
properties (dict, optional): Object property definitions
509
dynamic (bool|str, optional): Dynamic mapping behavior
510
enabled (bool, optional): Enable/disable object indexing
511
include_in_all (bool, optional): Include in _all field
512
**kwargs: Additional object field parameters
513
"""
514
pass
515
516
# Nested object structures
517
from pyes import ObjectField
518
519
# Address object with properties
520
address_field = ObjectField(properties={
521
"street": StringField(),
522
"city": StringField(index="not_analyzed"),
523
"state": StringField(index="not_analyzed"),
524
"zip_code": StringField(index="not_analyzed"),
525
"country": StringField(index="not_analyzed")
526
})
527
528
# User profile object
529
profile_field = ObjectField(
530
dynamic=True, # Allow new properties
531
properties={
532
"display_name": StringField(analyzer="standard"),
533
"bio": StringField(analyzer="english"),
534
"avatar_url": StringField(index="no"),
535
"social_links": ObjectField(enabled=False) # Store but don't index
536
}
537
)
538
```
539
540
### Nested Object Field
541
542
```python { .api }
543
class NestedObject(AbstractField):
544
"""
545
Nested object field that maintains object relationships.
546
547
Unlike ObjectField, NestedObject preserves the relationship between
548
properties within the same nested object.
549
"""
550
551
def __init__(self, properties=None, dynamic=None, include_in_all=None, **kwargs):
552
"""
553
Initialize NestedObject.
554
555
Args:
556
properties (dict, optional): Nested object property definitions
557
dynamic (bool|str, optional): Dynamic mapping behavior
558
include_in_all (bool, optional): Include in _all field
559
**kwargs: Additional nested object parameters
560
"""
561
pass
562
563
# Nested objects with preserved relationships
564
from pyes import NestedObject
565
566
# Product variants as nested objects
567
variants_field = NestedObject(properties={
568
"sku": StringField(index="not_analyzed"),
569
"color": StringField(index="not_analyzed"),
570
"size": StringField(index="not_analyzed"),
571
"price": FloatField(),
572
"stock_quantity": IntegerField(),
573
"is_available": BooleanField()
574
})
575
576
# Comment threads as nested objects
577
comments_field = NestedObject(properties={
578
"author": StringField(index="not_analyzed"),
579
"content": StringField(analyzer="english"),
580
"timestamp": DateField(),
581
"rating": IntegerField(),
582
"is_approved": BooleanField()
583
})
584
```
585
586
### Document Object Field
587
588
```python { .api }
589
class DocumentObjectField:
590
"""
591
Document-level mapping definition.
592
593
Represents the top-level mapping for a document type.
594
"""
595
596
def __init__(self, name=None, **kwargs):
597
"""
598
Initialize DocumentObjectField.
599
600
Args:
601
name (str, optional): Document type name
602
**kwargs: Document-level mapping parameters
603
"""
604
pass
605
606
def add_property(self, name, field):
607
"""
608
Add property to document mapping.
609
610
Args:
611
name (str): Property name
612
field (AbstractField): Field definition
613
"""
614
pass
615
616
# Complete document mapping
617
from pyes import DocumentObjectField
618
619
# Blog post document mapping
620
blog_post_mapping = DocumentObjectField("blog_post")
621
blog_post_mapping.add_property("title", StringField(analyzer="english", boost=2.0))
622
blog_post_mapping.add_property("content", StringField(analyzer="english"))
623
blog_post_mapping.add_property("summary", StringField(analyzer="english"))
624
blog_post_mapping.add_property("author", StringField(index="not_analyzed"))
625
blog_post_mapping.add_property("category", StringField(index="not_analyzed"))
626
blog_post_mapping.add_property("tags", StringField(index="not_analyzed"))
627
blog_post_mapping.add_property("published_date", DateField())
628
blog_post_mapping.add_property("view_count", IntegerField(null_value=0))
629
blog_post_mapping.add_property("rating", FloatField())
630
blog_post_mapping.add_property("is_featured", BooleanField(null_value=False))
631
blog_post_mapping.add_property("location", GeoPointField())
632
```
633
634
### Attachment Field
635
636
```python { .api }
637
class AttachmentField(AbstractField):
638
"""
639
Attachment field for file content extraction and indexing.
640
641
Requires mapper-attachments plugin for ElasticSearch.
642
"""
643
644
def __init__(self, **kwargs):
645
"""
646
Initialize AttachmentField.
647
648
Args:
649
**kwargs: Additional attachment field parameters
650
"""
651
pass
652
653
# File attachment indexing
654
from pyes import AttachmentField
655
656
# Document attachment with extracted content
657
file_attachment_field = AttachmentField()
658
659
# The attachment field will automatically extract:
660
# - content: Extracted text content
661
# - title: Document title
662
# - author: Document author
663
# - keywords: Document keywords
664
# - date: Document creation date
665
# - content_type: File MIME type
666
# - content_length: File size
667
# - language: Detected language
668
```
669
670
## Complete Mapping Examples
671
672
### E-commerce Product Mapping
673
674
```python { .api }
675
# Comprehensive e-commerce product mapping
676
from pyes import (Mapper, StringField, IntegerField, FloatField, BooleanField,
677
DateField, MultiField, NestedObject, ObjectField)
678
679
def create_product_mapping():
680
"""Create comprehensive product mapping for e-commerce."""
681
682
mapping = Mapper()
683
684
# Basic product information
685
mapping.add_property("name", MultiField("name", type="string", fields={
686
"analyzed": StringField(analyzer="english", boost=2.0),
687
"exact": StringField(index="not_analyzed"),
688
"suggest": StringField(analyzer="simple")
689
}))
690
691
mapping.add_property("description", StringField(
692
analyzer="english",
693
term_vector="with_positions_offsets"
694
))
695
696
mapping.add_property("sku", StringField(index="not_analyzed"))
697
mapping.add_property("brand", StringField(index="not_analyzed", boost=1.5))
698
mapping.add_property("category", StringField(index="not_analyzed"))
699
700
# Pricing and inventory
701
mapping.add_property("price", FloatField())
702
mapping.add_property("sale_price", FloatField())
703
mapping.add_property("cost", FloatField())
704
mapping.add_property("stock_quantity", IntegerField(null_value=0))
705
mapping.add_property("is_in_stock", BooleanField())
706
mapping.add_property("low_stock_threshold", IntegerField())
707
708
# Product status
709
mapping.add_property("is_active", BooleanField(null_value=True))
710
mapping.add_property("is_featured", BooleanField(null_value=False))
711
mapping.add_property("is_on_sale", BooleanField(null_value=False))
712
713
# Dates
714
mapping.add_property("created_date", DateField())
715
mapping.add_property("updated_date", DateField())
716
mapping.add_property("launch_date", DateField())
717
mapping.add_property("discontinue_date", DateField())
718
719
# Ratings and reviews
720
mapping.add_property("average_rating", FloatField())
721
mapping.add_property("review_count", IntegerField(null_value=0))
722
mapping.add_property("total_sales", IntegerField(null_value=0))
723
724
# Product variants as nested objects
725
mapping.add_property("variants", NestedObject(properties={
726
"sku": StringField(index="not_analyzed"),
727
"color": StringField(index="not_analyzed"),
728
"size": StringField(index="not_analyzed"),
729
"material": StringField(index="not_analyzed"),
730
"price": FloatField(),
731
"stock_quantity": IntegerField(),
732
"is_available": BooleanField(),
733
"weight": FloatField(),
734
"dimensions": ObjectField(properties={
735
"length": FloatField(),
736
"width": FloatField(),
737
"height": FloatField()
738
})
739
}))
740
741
# Product attributes (dynamic object)
742
mapping.add_property("attributes", ObjectField(
743
dynamic=True, # Allow arbitrary attributes
744
properties={
745
"color": StringField(index="not_analyzed"),
746
"size": StringField(index="not_analyzed"),
747
"material": StringField(index="not_analyzed"),
748
"style": StringField(index="not_analyzed")
749
}
750
))
751
752
# SEO fields
753
mapping.add_property("meta_title", StringField(analyzer="english"))
754
mapping.add_property("meta_description", StringField(analyzer="english"))
755
mapping.add_property("keywords", StringField(analyzer="keyword"))
756
mapping.add_property("url_slug", StringField(index="not_analyzed"))
757
758
# Images
759
mapping.add_property("primary_image", StringField(index="no"))
760
mapping.add_property("gallery_images", StringField(index="no"))
761
762
return mapping
763
764
# Apply product mapping
765
product_mapping = create_product_mapping()
766
es.indices.put_mapping("product", product_mapping.to_dict(), indices=["catalog"])
767
```
768
769
### User Profile Mapping
770
771
```python { .api }
772
# User profile and account mapping
773
def create_user_mapping():
774
"""Create comprehensive user profile mapping."""
775
776
mapping = Mapper()
777
778
# Basic user information
779
mapping.add_property("username", StringField(index="not_analyzed"))
780
mapping.add_property("email", StringField(index="not_analyzed"))
781
mapping.add_property("first_name", StringField(analyzer="standard"))
782
mapping.add_property("last_name", StringField(analyzer="standard"))
783
784
# Full name with multi-field analysis
785
mapping.add_property("full_name", MultiField("full_name", type="string", fields={
786
"analyzed": StringField(analyzer="standard"),
787
"exact": StringField(index="not_analyzed"),
788
"phonetic": StringField(analyzer="phonetic") # Custom analyzer needed
789
}))
790
791
# Profile information
792
mapping.add_property("bio", StringField(analyzer="english"))
793
mapping.add_property("title", StringField(analyzer="standard"))
794
mapping.add_property("company", StringField(index="not_analyzed"))
795
mapping.add_property("department", StringField(index="not_analyzed"))
796
797
# Contact information
798
mapping.add_property("phone", StringField(index="not_analyzed"))
799
mapping.add_property("mobile", StringField(index="not_analyzed"))
800
mapping.add_property("website", StringField(index="no"))
801
802
# Address as nested object
803
mapping.add_property("addresses", NestedObject(properties={
804
"type": StringField(index="not_analyzed"), # home, work, billing
805
"street": StringField(),
806
"city": StringField(index="not_analyzed"),
807
"state": StringField(index="not_analyzed"),
808
"postal_code": StringField(index="not_analyzed"),
809
"country": StringField(index="not_analyzed"),
810
"is_primary": BooleanField()
811
}))
812
813
# User status and flags
814
mapping.add_property("is_active", BooleanField(null_value=True))
815
mapping.add_property("is_verified", BooleanField(null_value=False))
816
mapping.add_property("is_premium", BooleanField(null_value=False))
817
mapping.add_property("account_type", StringField(index="not_analyzed"))
818
819
# Dates and timestamps
820
mapping.add_property("created_date", DateField())
821
mapping.add_property("last_login", DateField())
822
mapping.add_property("last_updated", DateField())
823
mapping.add_property("birth_date", DateField())
824
825
# Preferences and settings
826
mapping.add_property("preferences", ObjectField(
827
dynamic=True,
828
properties={
829
"language": StringField(index="not_analyzed"),
830
"timezone": StringField(index="not_analyzed"),
831
"notifications": ObjectField(enabled=False), # Store but don't index
832
"privacy_settings": ObjectField(enabled=False)
833
}
834
))
835
836
# Social and interests
837
mapping.add_property("interests", StringField(index="not_analyzed"))
838
mapping.add_property("skills", StringField(index="not_analyzed"))
839
mapping.add_property("social_links", ObjectField(
840
properties={
841
"linkedin": StringField(index="no"),
842
"twitter": StringField(index="no"),
843
"github": StringField(index="no")
844
}
845
))
846
847
# Activity metrics
848
mapping.add_property("login_count", IntegerField(null_value=0))
849
mapping.add_property("post_count", IntegerField(null_value=0))
850
mapping.add_property("reputation_score", IntegerField(null_value=0))
851
852
return mapping
853
854
# Apply user mapping
855
user_mapping = create_user_mapping()
856
es.indices.put_mapping("user", user_mapping.to_dict(), indices=["users"])
857
```
858
859
### Content Management Mapping
860
861
```python { .api }
862
# CMS content mapping with rich media support
863
def create_content_mapping():
864
"""Create comprehensive content management mapping."""
865
866
mapping = Mapper()
867
868
# Content identification
869
mapping.add_property("title", MultiField("title", type="string", fields={
870
"analyzed": StringField(analyzer="english", boost=3.0),
871
"exact": StringField(index="not_analyzed"),
872
"suggest": StringField(analyzer="simple")
873
}))
874
875
mapping.add_property("slug", StringField(index="not_analyzed"))
876
mapping.add_property("content_id", StringField(index="not_analyzed"))
877
878
# Content body with rich analysis
879
mapping.add_property("content", StringField(
880
analyzer="english",
881
term_vector="with_positions_offsets", # For highlighting
882
store=False # Use _source instead
883
))
884
885
mapping.add_property("excerpt", StringField(analyzer="english"))
886
mapping.add_property("summary", StringField(analyzer="english"))
887
888
# Content metadata
889
mapping.add_property("content_type", StringField(index="not_analyzed"))
890
mapping.add_property("format", StringField(index="not_analyzed")) # html, markdown, etc.
891
mapping.add_property("language", StringField(index="not_analyzed"))
892
mapping.add_property("word_count", IntegerField())
893
mapping.add_property("reading_time", IntegerField()) # minutes
894
895
# Authoring information
896
mapping.add_property("author", ObjectField(properties={
897
"id": StringField(index="not_analyzed"),
898
"name": StringField(analyzer="standard"),
899
"email": StringField(index="not_analyzed"),
900
"bio": StringField(analyzer="english")
901
}))
902
903
mapping.add_property("contributors", NestedObject(properties={
904
"id": StringField(index="not_analyzed"),
905
"name": StringField(analyzer="standard"),
906
"role": StringField(index="not_analyzed") # editor, reviewer, etc.
907
}))
908
909
# Publication workflow
910
mapping.add_property("status", StringField(index="not_analyzed"))
911
mapping.add_property("workflow_state", StringField(index="not_analyzed"))
912
mapping.add_property("is_published", BooleanField())
913
mapping.add_property("is_featured", BooleanField(null_value=False))
914
mapping.add_property("is_premium", BooleanField(null_value=False))
915
916
# Dates and scheduling
917
mapping.add_property("created_date", DateField())
918
mapping.add_property("updated_date", DateField())
919
mapping.add_property("published_date", DateField())
920
mapping.add_property("scheduled_date", DateField())
921
mapping.add_property("expiry_date", DateField())
922
923
# Categorization and tagging
924
mapping.add_property("category", StringField(index="not_analyzed"))
925
mapping.add_property("subcategory", StringField(index="not_analyzed"))
926
mapping.add_property("tags", StringField(index="not_analyzed"))
927
mapping.add_property("topics", StringField(index="not_analyzed"))
928
929
# SEO and social
930
mapping.add_property("seo", ObjectField(properties={
931
"meta_title": StringField(analyzer="english"),
932
"meta_description": StringField(analyzer="english"),
933
"keywords": StringField(analyzer="keyword"),
934
"canonical_url": StringField(index="no"),
935
"og_title": StringField(analyzer="english"),
936
"og_description": StringField(analyzer="english"),
937
"og_image": StringField(index="no")
938
}))
939
940
# Media attachments
941
mapping.add_property("media", NestedObject(properties={
942
"type": StringField(index="not_analyzed"), # image, video, audio, document
943
"url": StringField(index="no"),
944
"title": StringField(analyzer="standard"),
945
"alt_text": StringField(analyzer="english"),
946
"caption": StringField(analyzer="english"),
947
"file_size": IntegerField(),
948
"mime_type": StringField(index="not_analyzed"),
949
"dimensions": ObjectField(properties={
950
"width": IntegerField(),
951
"height": IntegerField()
952
})
953
}))
954
955
# Engagement metrics
956
mapping.add_property("view_count", IntegerField(null_value=0))
957
mapping.add_property("like_count", IntegerField(null_value=0))
958
mapping.add_property("share_count", IntegerField(null_value=0))
959
mapping.add_property("comment_count", IntegerField(null_value=0))
960
mapping.add_property("average_rating", FloatField())
961
962
# Content relationships
963
mapping.add_property("related_content", StringField(index="not_analyzed"))
964
mapping.add_property("parent_content", StringField(index="not_analyzed"))
965
mapping.add_property("series_id", StringField(index="not_analyzed"))
966
967
return mapping
968
969
# Apply content mapping
970
content_mapping = create_content_mapping()
971
es.indices.put_mapping("content", content_mapping.to_dict(), indices=["cms"])
972
```
973
974
## Mapping Management Operations
975
976
### Dynamic Mapping Control
977
978
```python { .api }
979
# Control dynamic mapping behavior
980
def configure_dynamic_mapping():
981
"""Configure dynamic mapping settings for flexible schemas."""
982
983
# Strict mapping (no new fields allowed)
984
strict_mapping = {
985
"dynamic": "strict",
986
"properties": {
987
"title": {"type": "string", "analyzer": "english"},
988
"content": {"type": "string", "analyzer": "english"}
989
}
990
}
991
992
# Dynamic mapping with templates
993
dynamic_mapping = {
994
"dynamic": True,
995
"dynamic_templates": [
996
{
997
"strings_as_keywords": {
998
"match": "*_keyword",
999
"mapping": {
1000
"type": "string",
1001
"index": "not_analyzed"
1002
}
1003
}
1004
},
1005
{
1006
"strings_as_text": {
1007
"match": "*_text",
1008
"mapping": {
1009
"type": "string",
1010
"analyzer": "english"
1011
}
1012
}
1013
},
1014
{
1015
"dates": {
1016
"match": "*_date",
1017
"mapping": {
1018
"type": "date",
1019
"format": "yyyy-MM-dd||epoch_millis"
1020
}
1021
}
1022
}
1023
],
1024
"properties": {
1025
# Explicit field definitions
1026
"id": {"type": "string", "index": "not_analyzed"}
1027
}
1028
}
1029
1030
# Apply dynamic mapping
1031
es.indices.put_mapping("flexible_doc", dynamic_mapping, indices=["flexible"])
1032
1033
return dynamic_mapping
1034
1035
# Index settings for mapping optimization
1036
def create_optimized_index_settings():
1037
"""Create index with optimized settings for mapping performance."""
1038
1039
settings = {
1040
"settings": {
1041
"number_of_shards": 5,
1042
"number_of_replicas": 1,
1043
"analysis": {
1044
"analyzer": {
1045
"custom_english": {
1046
"type": "english",
1047
"stopwords": ["the", "and", "or", "but"]
1048
},
1049
"autocomplete": {
1050
"tokenizer": "keyword",
1051
"filters": ["lowercase", "edge_ngram"]
1052
}
1053
},
1054
"filter": {
1055
"edge_ngram": {
1056
"type": "edge_ngram",
1057
"min_gram": 1,
1058
"max_gram": 20
1059
}
1060
}
1061
}
1062
},
1063
"mappings": {
1064
"document": {
1065
"properties": {
1066
"title": {
1067
"type": "multi_field",
1068
"fields": {
1069
"analyzed": {"type": "string", "analyzer": "custom_english"},
1070
"autocomplete": {"type": "string", "analyzer": "autocomplete"}
1071
}
1072
}
1073
}
1074
}
1075
}
1076
}
1077
1078
# Create index with settings and mapping
1079
es.indices.create_index("optimized_index", settings)
1080
1081
return settings
1082
```
1083
1084
## Best Practices
1085
1086
### Performance Optimization
1087
1088
```python { .api }
1089
# Mapping performance optimization strategies
1090
def optimize_mapping_performance():
1091
"""Best practices for high-performance mappings."""
1092
1093
# 1. Use appropriate field types
1094
optimized_mapping = Mapper()
1095
1096
# Use keyword fields for exact matches (faster than analyzed strings)
1097
optimized_mapping.add_property("status", StringField(index="not_analyzed"))
1098
1099
# Use appropriate numeric types (don't use long for small numbers)
1100
optimized_mapping.add_property("count", IntegerField()) # Not LongField
1101
1102
# Disable indexing for display-only fields
1103
optimized_mapping.add_property("description", StringField(index="no", store=True))
1104
1105
# 2. Optimize string field settings
1106
# Disable norms for fields that don't need scoring
1107
optimized_mapping.add_property("category", StringField(
1108
index="not_analyzed",
1109
omit_norms=True # Saves space, faster filtering
1110
))
1111
1112
# Use appropriate term vectors (only when needed)
1113
optimized_mapping.add_property("content", StringField(
1114
analyzer="english",
1115
term_vector="with_positions_offsets", # Only if highlighting needed
1116
store=False # Use _source instead of stored fields
1117
))
1118
1119
# 3. Optimize date fields
1120
optimized_mapping.add_property("timestamp", DateField(
1121
precision_step=4, # Better range query performance
1122
format="epoch_millis" # Faster parsing
1123
))
1124
1125
# 4. Use doc_values for sorting/aggregation fields
1126
optimized_mapping.add_property("sort_field", StringField(
1127
index="not_analyzed",
1128
doc_values=True # Faster sorting/aggregation
1129
))
1130
1131
return optimized_mapping
1132
1133
# Memory optimization
1134
def optimize_mapping_memory():
1135
"""Optimize mapping for memory usage."""
1136
1137
memory_mapping = Mapper()
1138
1139
# Disable _all field if not needed (saves space and indexing time)
1140
memory_mapping._all = {"enabled": False}
1141
1142
# Use compressed storage for large text fields
1143
memory_mapping.add_property("large_content", StringField(
1144
analyzer="english",
1145
compress=True, # Compress stored content
1146
compress_threshold="100b" # Compress if larger than 100 bytes
1147
))
1148
1149
# Disable source for fields not needed in results
1150
memory_mapping._source = {
1151
"excludes": ["internal_field", "temp_data"]
1152
}
1153
1154
return memory_mapping
1155
```
1156
1157
### Schema Evolution
1158
1159
```python { .api }
1160
# Handle mapping changes and schema evolution
1161
def handle_mapping_evolution():
1162
"""Strategies for evolving mappings over time."""
1163
1164
# 1. Additive changes (safe)
1165
def add_new_field():
1166
"""Add new field to existing mapping."""
1167
new_field_mapping = {
1168
"properties": {
1169
"new_feature": {"type": "string", "analyzer": "english"}
1170
}
1171
}
1172
es.indices.put_mapping("document", new_field_mapping, indices=["myindex"])
1173
1174
# 2. Breaking changes (require reindexing)
1175
def change_field_type():
1176
"""Handle field type changes that require reindexing."""
1177
1178
# Create new index with updated mapping
1179
new_mapping = create_updated_mapping()
1180
es.indices.create_index("myindex_v2")
1181
es.indices.put_mapping("document", new_mapping.to_dict(), indices=["myindex_v2"])
1182
1183
# Reindex data (in production, use scroll/bulk for large datasets)
1184
# This is a simplified example
1185
old_docs = es.search({"query": {"match_all": {}}}, indices=["myindex"], size=1000)
1186
1187
for doc in old_docs:
1188
# Transform document if needed
1189
transformed_doc = transform_document(doc)
1190
es.index(transformed_doc, "myindex_v2", "document", id=doc._meta.id)
1191
1192
# Switch aliases
1193
es.indices.change_aliases([
1194
{"remove": {"index": "myindex", "alias": "current"}},
1195
{"add": {"index": "myindex_v2", "alias": "current"}}
1196
])
1197
1198
# 3. Version-aware mapping
1199
def create_versioned_mapping():
1200
"""Create mapping with version information for tracking."""
1201
1202
versioned_mapping = Mapper()
1203
versioned_mapping.add_property("_mapping_version", IntegerField())
1204
versioned_mapping.add_property("title", StringField(analyzer="english"))
1205
# ... other fields
1206
1207
return versioned_mapping
1208
1209
def create_updated_mapping():
1210
"""Create updated mapping for schema evolution."""
1211
1212
mapping = Mapper()
1213
# Updated field definitions
1214
mapping.add_property("title", StringField(analyzer="english"))
1215
mapping.add_property("content", StringField(analyzer="english"))
1216
# Changed field type
1217
mapping.add_property("price", DoubleField()) # Changed from FloatField
1218
1219
return mapping
1220
1221
def transform_document(doc):
1222
"""Transform document during reindexing."""
1223
1224
# Example transformations
1225
if hasattr(doc, 'old_field'):
1226
doc.new_field = transform_old_field(doc.old_field)
1227
delattr(doc, 'old_field')
1228
1229
return doc
1230
```
1231
1232
PyES mapping management provides comprehensive control over ElasticSearch index schemas, enabling efficient data storage, fast searching, and proper data type handling while supporting schema evolution and performance optimization.