Python Elastic Search driver providing a pythonic interface for interacting with ElasticSearch clusters
—
PyES provides comprehensive mapping management for defining ElasticSearch index schemas. Mappings define how documents and their fields are stored and indexed, including field types, analyzers, and indexing options. Proper mapping design is crucial for search performance, data integrity, and storage efficiency.
class Mapper:
"""
Main mapping management class for ElasticSearch indices.
Handles document type mappings, field definitions, and schema operations.
"""
def __init__(self):
"""Initialize Mapper instance."""
pass
def get_doctype(self, name):
"""
Get document type mapping by name.
Args:
name (str): Document type name
Returns:
DocumentObjectField: Document type mapping
"""
pass
def to_dict(self):
"""
Convert mapper to dictionary format.
Returns:
dict: Mapping dictionary for ElasticSearch
"""
pass
def add_property(self, name, field):
"""
Add field property to mapping.
Args:
name (str): Field name
field (AbstractField): Field definition
"""
pass
def create_index_if_missing(self, index_name):
"""
Create index if it doesn't exist.
Args:
index_name (str): Index name to create
"""
pass
# Basic mapper usage
from pyes import Mapper, StringField, IntegerField, DateField
# Create mapping for blog posts
blog_mapping = Mapper()
blog_mapping.add_property("title", StringField(analyzer="standard"))
blog_mapping.add_property("content", StringField(analyzer="english"))
blog_mapping.add_property("view_count", IntegerField())
blog_mapping.add_property("published_date", DateField())
# Apply mapping to index
es.indices.put_mapping("blog_post", blog_mapping.to_dict(), indices=["blog"])class AbstractField:
"""
Base class for all field types.
Defines common field properties and behavior.
"""
def __init__(self, index=None, store=None, boost=None,
null_value=None, include_in_all=None, **kwargs):
"""
Initialize base field.
Args:
index (str, optional): Index option (analyzed, not_analyzed, no)
store (bool, optional): Store field value separately
boost (float, optional): Field boost factor for scoring
null_value (any, optional): Default value for null fields
include_in_all (bool, optional): Include field in _all field
**kwargs: Additional field-specific parameters
"""
pass
def as_dict(self):
"""
Convert field to dictionary representation.
Returns:
dict: Field definition for ElasticSearch mapping
"""
passclass StringField(AbstractField):
"""
String/text field for textual content.
Supports full-text search, analysis, and various string operations.
"""
def __init__(self, analyzer=None, index_analyzer=None, search_analyzer=None,
index=None, store=None, term_vector=None, boost=None,
null_value=None, omit_norms=None, omit_term_freq_and_positions=None,
include_in_all=None, **kwargs):
"""
Initialize StringField.
Args:
analyzer (str, optional): Analyzer for indexing and searching
index_analyzer (str, optional): Analyzer for indexing only
search_analyzer (str, optional): Analyzer for searching only
index (str, optional): Index option (analyzed, not_analyzed, no)
store (bool, optional): Store original field value
term_vector (str, optional): Term vector option (no, yes, with_offsets,
with_positions, with_positions_offsets)
boost (float, optional): Field boost for relevance
null_value (str, optional): Default value for null
omit_norms (bool, optional): Omit field-length normalization
omit_term_freq_and_positions (bool, optional): Omit term frequency/positions
include_in_all (bool, optional): Include in _all field
**kwargs: Additional string field parameters
"""
pass
# Text field configurations
from pyes import StringField
# Full-text search field with English analyzer
content_field = StringField(
analyzer="english",
term_vector="with_positions_offsets", # For highlighting
store=False # Don't store original (use _source)
)
# Exact-match keyword field
category_field = StringField(
index="not_analyzed", # No analysis for exact matching
store=True,
boost=1.5
)
# Multi-language field with custom analyzer
title_field = StringField(
analyzer="standard",
search_analyzer="english", # Different analyzer for search
include_in_all=True
)
# Non-indexed field for display only
description_field = StringField(
index="no", # Not searchable
store=True # But stored for retrieval
)class NumericFieldAbstract(AbstractField):
"""
Base class for numeric field types.
Provides common numeric field functionality.
"""
def __init__(self, precision_step=None, **kwargs):
"""
Initialize numeric field.
Args:
precision_step (int, optional): Precision step for range queries
**kwargs: Additional numeric field parameters
"""
passclass IntegerField(NumericFieldAbstract):
"""
32-bit signed integer field (-2^31 to 2^31-1).
"""
def __init__(self, **kwargs):
"""Initialize IntegerField."""
pass
class LongField(NumericFieldAbstract):
"""
64-bit signed integer field (-2^63 to 2^63-1).
"""
def __init__(self, **kwargs):
"""Initialize LongField."""
pass
class ShortField(NumericFieldAbstract):
"""
16-bit signed integer field (-32,768 to 32,767).
"""
def __init__(self, **kwargs):
"""Initialize ShortField."""
pass
class ByteField(NumericFieldAbstract):
"""
8-bit signed integer field (-128 to 127).
"""
def __init__(self, **kwargs):
"""Initialize ByteField."""
pass
# Integer field usage
from pyes import IntegerField, LongField, ShortField, ByteField
# Standard counters and IDs
user_id_field = IntegerField()
view_count_field = IntegerField(null_value=0)
# Large numbers (timestamps, large counters)
timestamp_field = LongField()
total_bytes_field = LongField()
# Small numbers (status codes, categories)
status_code_field = ShortField()
priority_field = ByteField(null_value=0)class FloatField(NumericFieldAbstract):
"""
32-bit floating point field (IEEE 754).
"""
def __init__(self, **kwargs):
"""Initialize FloatField."""
pass
class DoubleField(NumericFieldAbstract):
"""
64-bit floating point field (IEEE 754).
"""
def __init__(self, **kwargs):
"""Initialize DoubleField."""
pass
# Floating point usage
from pyes import FloatField, DoubleField
# Standard precision
price_field = FloatField(null_value=0.0)
rating_field = FloatField()
# High precision calculations
latitude_field = DoubleField()
longitude_field = DoubleField()
precise_calculation_field = DoubleField()class DateField(AbstractField):
"""
Date and datetime field with flexible format support.
"""
def __init__(self, format=None, precision_step=None, **kwargs):
"""
Initialize DateField.
Args:
format (str, optional): Date format pattern(s)
precision_step (int, optional): Precision step for range queries
**kwargs: Additional date field parameters
"""
pass
# Date field configurations
from pyes import DateField
# ISO date format (default)
published_date_field = DateField()
# Custom date format
custom_date_field = DateField(format="yyyy-MM-dd HH:mm:ss")
# Multiple date formats
flexible_date_field = DateField(
format="yyyy-MM-dd||yyyy-MM-dd HH:mm:ss||epoch_millis"
)
# Date with precision step for better range performance
timestamp_field = DateField(
precision_step=4, # Better range query performance
format="epoch_millis"
)class BooleanField(AbstractField):
"""
Boolean field for true/false values.
"""
def __init__(self, **kwargs):
"""
Initialize BooleanField.
Args:
**kwargs: Additional boolean field parameters
"""
pass
# Boolean field usage
from pyes import BooleanField
# Simple boolean flags
is_published_field = BooleanField(null_value=False)
featured_field = BooleanField()
is_active_field = BooleanField(null_value=True)class BinaryField(AbstractField):
"""
Binary data field for storing base64-encoded binary data.
"""
def __init__(self, **kwargs):
"""
Initialize BinaryField.
Args:
**kwargs: Additional binary field parameters
"""
pass
# Binary data storage
from pyes import BinaryField
# File attachments
file_content_field = BinaryField(store=True)
thumbnail_field = BinaryField()
encrypted_data_field = BinaryField()class IpField(AbstractField):
"""
IP address field for IPv4 addresses.
"""
def __init__(self, **kwargs):
"""
Initialize IpField.
Args:
**kwargs: Additional IP field parameters
"""
pass
# IP address tracking
from pyes import IpField
# Network addresses
client_ip_field = IpField()
server_ip_field = IpField()
proxy_ip_field = IpField()class GeoPointField(AbstractField):
"""
Geographic point field for latitude/longitude coordinates.
"""
def __init__(self, lat_lon=None, geohash=None, geohash_precision=None, **kwargs):
"""
Initialize GeoPointField.
Args:
lat_lon (bool, optional): Enable lat/lon format
geohash (bool, optional): Enable geohash format
geohash_precision (int, optional): Geohash precision level
**kwargs: Additional geo point parameters
"""
pass
# Geographic location fields
from pyes import GeoPointField
# Basic location tracking
location_field = GeoPointField()
# Location with geohash support for proximity searches
restaurant_location_field = GeoPointField(
lat_lon=True,
geohash=True,
geohash_precision=12
)
# Event location
event_coordinates_field = GeoPointField(lat_lon=True)class MultiField(AbstractField):
"""
Multi-field mapping for analyzing the same content in different ways.
Allows a field to be indexed multiple times with different analyzers.
"""
def __init__(self, name, type=None, path="just_name", fields=None, **kwargs):
"""
Initialize MultiField.
Args:
name (str): Field name
type (str, optional): Main field type
path (str): Path type for field names. Default: "just_name"
fields (dict, optional): Sub-field definitions
**kwargs: Additional multi-field parameters
"""
pass
# Multi-field for different analysis approaches
from pyes import MultiField, StringField
# Title field with both analyzed and exact versions
title_multifield = MultiField("title", type="string", fields={
"analyzed": StringField(analyzer="english"),
"exact": StringField(index="not_analyzed"),
"suggest": StringField(analyzer="simple")
})
# Name field with different analyzers
name_multifield = MultiField("name", type="string", fields={
"standard": StringField(analyzer="standard"),
"keyword": StringField(index="not_analyzed"),
"phonetic": StringField(analyzer="phonetic_analyzer")
})class ObjectField(AbstractField):
"""
Object field for nested JSON objects with properties.
"""
def __init__(self, properties=None, dynamic=None, enabled=None,
include_in_all=None, **kwargs):
"""
Initialize ObjectField.
Args:
properties (dict, optional): Object property definitions
dynamic (bool|str, optional): Dynamic mapping behavior
enabled (bool, optional): Enable/disable object indexing
include_in_all (bool, optional): Include in _all field
**kwargs: Additional object field parameters
"""
pass
# Nested object structures
from pyes import ObjectField
# Address object with properties
address_field = ObjectField(properties={
"street": StringField(),
"city": StringField(index="not_analyzed"),
"state": StringField(index="not_analyzed"),
"zip_code": StringField(index="not_analyzed"),
"country": StringField(index="not_analyzed")
})
# User profile object
profile_field = ObjectField(
dynamic=True, # Allow new properties
properties={
"display_name": StringField(analyzer="standard"),
"bio": StringField(analyzer="english"),
"avatar_url": StringField(index="no"),
"social_links": ObjectField(enabled=False) # Store but don't index
}
)class NestedObject(AbstractField):
"""
Nested object field that maintains object relationships.
Unlike ObjectField, NestedObject preserves the relationship between
properties within the same nested object.
"""
def __init__(self, properties=None, dynamic=None, include_in_all=None, **kwargs):
"""
Initialize NestedObject.
Args:
properties (dict, optional): Nested object property definitions
dynamic (bool|str, optional): Dynamic mapping behavior
include_in_all (bool, optional): Include in _all field
**kwargs: Additional nested object parameters
"""
pass
# Nested objects with preserved relationships
from pyes import NestedObject
# Product variants as nested objects
variants_field = NestedObject(properties={
"sku": StringField(index="not_analyzed"),
"color": StringField(index="not_analyzed"),
"size": StringField(index="not_analyzed"),
"price": FloatField(),
"stock_quantity": IntegerField(),
"is_available": BooleanField()
})
# Comment threads as nested objects
comments_field = NestedObject(properties={
"author": StringField(index="not_analyzed"),
"content": StringField(analyzer="english"),
"timestamp": DateField(),
"rating": IntegerField(),
"is_approved": BooleanField()
})class DocumentObjectField:
"""
Document-level mapping definition.
Represents the top-level mapping for a document type.
"""
def __init__(self, name=None, **kwargs):
"""
Initialize DocumentObjectField.
Args:
name (str, optional): Document type name
**kwargs: Document-level mapping parameters
"""
pass
def add_property(self, name, field):
"""
Add property to document mapping.
Args:
name (str): Property name
field (AbstractField): Field definition
"""
pass
# Complete document mapping
from pyes import DocumentObjectField
# Blog post document mapping
blog_post_mapping = DocumentObjectField("blog_post")
blog_post_mapping.add_property("title", StringField(analyzer="english", boost=2.0))
blog_post_mapping.add_property("content", StringField(analyzer="english"))
blog_post_mapping.add_property("summary", StringField(analyzer="english"))
blog_post_mapping.add_property("author", StringField(index="not_analyzed"))
blog_post_mapping.add_property("category", StringField(index="not_analyzed"))
blog_post_mapping.add_property("tags", StringField(index="not_analyzed"))
blog_post_mapping.add_property("published_date", DateField())
blog_post_mapping.add_property("view_count", IntegerField(null_value=0))
blog_post_mapping.add_property("rating", FloatField())
blog_post_mapping.add_property("is_featured", BooleanField(null_value=False))
blog_post_mapping.add_property("location", GeoPointField())class AttachmentField(AbstractField):
"""
Attachment field for file content extraction and indexing.
Requires mapper-attachments plugin for ElasticSearch.
"""
def __init__(self, **kwargs):
"""
Initialize AttachmentField.
Args:
**kwargs: Additional attachment field parameters
"""
pass
# File attachment indexing
from pyes import AttachmentField
# Document attachment with extracted content
file_attachment_field = AttachmentField()
# The attachment field will automatically extract:
# - content: Extracted text content
# - title: Document title
# - author: Document author
# - keywords: Document keywords
# - date: Document creation date
# - content_type: File MIME type
# - content_length: File size
# - language: Detected language# Comprehensive e-commerce product mapping
from pyes import (Mapper, StringField, IntegerField, FloatField, BooleanField,
DateField, MultiField, NestedObject, ObjectField)
def create_product_mapping():
"""Create comprehensive product mapping for e-commerce."""
mapping = Mapper()
# Basic product information
mapping.add_property("name", MultiField("name", type="string", fields={
"analyzed": StringField(analyzer="english", boost=2.0),
"exact": StringField(index="not_analyzed"),
"suggest": StringField(analyzer="simple")
}))
mapping.add_property("description", StringField(
analyzer="english",
term_vector="with_positions_offsets"
))
mapping.add_property("sku", StringField(index="not_analyzed"))
mapping.add_property("brand", StringField(index="not_analyzed", boost=1.5))
mapping.add_property("category", StringField(index="not_analyzed"))
# Pricing and inventory
mapping.add_property("price", FloatField())
mapping.add_property("sale_price", FloatField())
mapping.add_property("cost", FloatField())
mapping.add_property("stock_quantity", IntegerField(null_value=0))
mapping.add_property("is_in_stock", BooleanField())
mapping.add_property("low_stock_threshold", IntegerField())
# Product status
mapping.add_property("is_active", BooleanField(null_value=True))
mapping.add_property("is_featured", BooleanField(null_value=False))
mapping.add_property("is_on_sale", BooleanField(null_value=False))
# Dates
mapping.add_property("created_date", DateField())
mapping.add_property("updated_date", DateField())
mapping.add_property("launch_date", DateField())
mapping.add_property("discontinue_date", DateField())
# Ratings and reviews
mapping.add_property("average_rating", FloatField())
mapping.add_property("review_count", IntegerField(null_value=0))
mapping.add_property("total_sales", IntegerField(null_value=0))
# Product variants as nested objects
mapping.add_property("variants", NestedObject(properties={
"sku": StringField(index="not_analyzed"),
"color": StringField(index="not_analyzed"),
"size": StringField(index="not_analyzed"),
"material": StringField(index="not_analyzed"),
"price": FloatField(),
"stock_quantity": IntegerField(),
"is_available": BooleanField(),
"weight": FloatField(),
"dimensions": ObjectField(properties={
"length": FloatField(),
"width": FloatField(),
"height": FloatField()
})
}))
# Product attributes (dynamic object)
mapping.add_property("attributes", ObjectField(
dynamic=True, # Allow arbitrary attributes
properties={
"color": StringField(index="not_analyzed"),
"size": StringField(index="not_analyzed"),
"material": StringField(index="not_analyzed"),
"style": StringField(index="not_analyzed")
}
))
# SEO fields
mapping.add_property("meta_title", StringField(analyzer="english"))
mapping.add_property("meta_description", StringField(analyzer="english"))
mapping.add_property("keywords", StringField(analyzer="keyword"))
mapping.add_property("url_slug", StringField(index="not_analyzed"))
# Images
mapping.add_property("primary_image", StringField(index="no"))
mapping.add_property("gallery_images", StringField(index="no"))
return mapping
# Apply product mapping
product_mapping = create_product_mapping()
es.indices.put_mapping("product", product_mapping.to_dict(), indices=["catalog"])# User profile and account mapping
def create_user_mapping():
"""Create comprehensive user profile mapping."""
mapping = Mapper()
# Basic user information
mapping.add_property("username", StringField(index="not_analyzed"))
mapping.add_property("email", StringField(index="not_analyzed"))
mapping.add_property("first_name", StringField(analyzer="standard"))
mapping.add_property("last_name", StringField(analyzer="standard"))
# Full name with multi-field analysis
mapping.add_property("full_name", MultiField("full_name", type="string", fields={
"analyzed": StringField(analyzer="standard"),
"exact": StringField(index="not_analyzed"),
"phonetic": StringField(analyzer="phonetic") # Custom analyzer needed
}))
# Profile information
mapping.add_property("bio", StringField(analyzer="english"))
mapping.add_property("title", StringField(analyzer="standard"))
mapping.add_property("company", StringField(index="not_analyzed"))
mapping.add_property("department", StringField(index="not_analyzed"))
# Contact information
mapping.add_property("phone", StringField(index="not_analyzed"))
mapping.add_property("mobile", StringField(index="not_analyzed"))
mapping.add_property("website", StringField(index="no"))
# Address as nested object
mapping.add_property("addresses", NestedObject(properties={
"type": StringField(index="not_analyzed"), # home, work, billing
"street": StringField(),
"city": StringField(index="not_analyzed"),
"state": StringField(index="not_analyzed"),
"postal_code": StringField(index="not_analyzed"),
"country": StringField(index="not_analyzed"),
"is_primary": BooleanField()
}))
# User status and flags
mapping.add_property("is_active", BooleanField(null_value=True))
mapping.add_property("is_verified", BooleanField(null_value=False))
mapping.add_property("is_premium", BooleanField(null_value=False))
mapping.add_property("account_type", StringField(index="not_analyzed"))
# Dates and timestamps
mapping.add_property("created_date", DateField())
mapping.add_property("last_login", DateField())
mapping.add_property("last_updated", DateField())
mapping.add_property("birth_date", DateField())
# Preferences and settings
mapping.add_property("preferences", ObjectField(
dynamic=True,
properties={
"language": StringField(index="not_analyzed"),
"timezone": StringField(index="not_analyzed"),
"notifications": ObjectField(enabled=False), # Store but don't index
"privacy_settings": ObjectField(enabled=False)
}
))
# Social and interests
mapping.add_property("interests", StringField(index="not_analyzed"))
mapping.add_property("skills", StringField(index="not_analyzed"))
mapping.add_property("social_links", ObjectField(
properties={
"linkedin": StringField(index="no"),
"twitter": StringField(index="no"),
"github": StringField(index="no")
}
))
# Activity metrics
mapping.add_property("login_count", IntegerField(null_value=0))
mapping.add_property("post_count", IntegerField(null_value=0))
mapping.add_property("reputation_score", IntegerField(null_value=0))
return mapping
# Apply user mapping
user_mapping = create_user_mapping()
es.indices.put_mapping("user", user_mapping.to_dict(), indices=["users"])# CMS content mapping with rich media support
def create_content_mapping():
"""Create comprehensive content management mapping."""
mapping = Mapper()
# Content identification
mapping.add_property("title", MultiField("title", type="string", fields={
"analyzed": StringField(analyzer="english", boost=3.0),
"exact": StringField(index="not_analyzed"),
"suggest": StringField(analyzer="simple")
}))
mapping.add_property("slug", StringField(index="not_analyzed"))
mapping.add_property("content_id", StringField(index="not_analyzed"))
# Content body with rich analysis
mapping.add_property("content", StringField(
analyzer="english",
term_vector="with_positions_offsets", # For highlighting
store=False # Use _source instead
))
mapping.add_property("excerpt", StringField(analyzer="english"))
mapping.add_property("summary", StringField(analyzer="english"))
# Content metadata
mapping.add_property("content_type", StringField(index="not_analyzed"))
mapping.add_property("format", StringField(index="not_analyzed")) # html, markdown, etc.
mapping.add_property("language", StringField(index="not_analyzed"))
mapping.add_property("word_count", IntegerField())
mapping.add_property("reading_time", IntegerField()) # minutes
# Authoring information
mapping.add_property("author", ObjectField(properties={
"id": StringField(index="not_analyzed"),
"name": StringField(analyzer="standard"),
"email": StringField(index="not_analyzed"),
"bio": StringField(analyzer="english")
}))
mapping.add_property("contributors", NestedObject(properties={
"id": StringField(index="not_analyzed"),
"name": StringField(analyzer="standard"),
"role": StringField(index="not_analyzed") # editor, reviewer, etc.
}))
# Publication workflow
mapping.add_property("status", StringField(index="not_analyzed"))
mapping.add_property("workflow_state", StringField(index="not_analyzed"))
mapping.add_property("is_published", BooleanField())
mapping.add_property("is_featured", BooleanField(null_value=False))
mapping.add_property("is_premium", BooleanField(null_value=False))
# Dates and scheduling
mapping.add_property("created_date", DateField())
mapping.add_property("updated_date", DateField())
mapping.add_property("published_date", DateField())
mapping.add_property("scheduled_date", DateField())
mapping.add_property("expiry_date", DateField())
# Categorization and tagging
mapping.add_property("category", StringField(index="not_analyzed"))
mapping.add_property("subcategory", StringField(index="not_analyzed"))
mapping.add_property("tags", StringField(index="not_analyzed"))
mapping.add_property("topics", StringField(index="not_analyzed"))
# SEO and social
mapping.add_property("seo", ObjectField(properties={
"meta_title": StringField(analyzer="english"),
"meta_description": StringField(analyzer="english"),
"keywords": StringField(analyzer="keyword"),
"canonical_url": StringField(index="no"),
"og_title": StringField(analyzer="english"),
"og_description": StringField(analyzer="english"),
"og_image": StringField(index="no")
}))
# Media attachments
mapping.add_property("media", NestedObject(properties={
"type": StringField(index="not_analyzed"), # image, video, audio, document
"url": StringField(index="no"),
"title": StringField(analyzer="standard"),
"alt_text": StringField(analyzer="english"),
"caption": StringField(analyzer="english"),
"file_size": IntegerField(),
"mime_type": StringField(index="not_analyzed"),
"dimensions": ObjectField(properties={
"width": IntegerField(),
"height": IntegerField()
})
}))
# Engagement metrics
mapping.add_property("view_count", IntegerField(null_value=0))
mapping.add_property("like_count", IntegerField(null_value=0))
mapping.add_property("share_count", IntegerField(null_value=0))
mapping.add_property("comment_count", IntegerField(null_value=0))
mapping.add_property("average_rating", FloatField())
# Content relationships
mapping.add_property("related_content", StringField(index="not_analyzed"))
mapping.add_property("parent_content", StringField(index="not_analyzed"))
mapping.add_property("series_id", StringField(index="not_analyzed"))
return mapping
# Apply content mapping
content_mapping = create_content_mapping()
es.indices.put_mapping("content", content_mapping.to_dict(), indices=["cms"])# Control dynamic mapping behavior
def configure_dynamic_mapping():
"""Configure dynamic mapping settings for flexible schemas."""
# Strict mapping (no new fields allowed)
strict_mapping = {
"dynamic": "strict",
"properties": {
"title": {"type": "string", "analyzer": "english"},
"content": {"type": "string", "analyzer": "english"}
}
}
# Dynamic mapping with templates
dynamic_mapping = {
"dynamic": True,
"dynamic_templates": [
{
"strings_as_keywords": {
"match": "*_keyword",
"mapping": {
"type": "string",
"index": "not_analyzed"
}
}
},
{
"strings_as_text": {
"match": "*_text",
"mapping": {
"type": "string",
"analyzer": "english"
}
}
},
{
"dates": {
"match": "*_date",
"mapping": {
"type": "date",
"format": "yyyy-MM-dd||epoch_millis"
}
}
}
],
"properties": {
# Explicit field definitions
"id": {"type": "string", "index": "not_analyzed"}
}
}
# Apply dynamic mapping
es.indices.put_mapping("flexible_doc", dynamic_mapping, indices=["flexible"])
return dynamic_mapping
# Index settings for mapping optimization
def create_optimized_index_settings():
"""Create index with optimized settings for mapping performance."""
settings = {
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"custom_english": {
"type": "english",
"stopwords": ["the", "and", "or", "but"]
},
"autocomplete": {
"tokenizer": "keyword",
"filters": ["lowercase", "edge_ngram"]
}
},
"filter": {
"edge_ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
}
}
},
"mappings": {
"document": {
"properties": {
"title": {
"type": "multi_field",
"fields": {
"analyzed": {"type": "string", "analyzer": "custom_english"},
"autocomplete": {"type": "string", "analyzer": "autocomplete"}
}
}
}
}
}
}
# Create index with settings and mapping
es.indices.create_index("optimized_index", settings)
return settings# Mapping performance optimization strategies
def optimize_mapping_performance():
"""Best practices for high-performance mappings."""
# 1. Use appropriate field types
optimized_mapping = Mapper()
# Use keyword fields for exact matches (faster than analyzed strings)
optimized_mapping.add_property("status", StringField(index="not_analyzed"))
# Use appropriate numeric types (don't use long for small numbers)
optimized_mapping.add_property("count", IntegerField()) # Not LongField
# Disable indexing for display-only fields
optimized_mapping.add_property("description", StringField(index="no", store=True))
# 2. Optimize string field settings
# Disable norms for fields that don't need scoring
optimized_mapping.add_property("category", StringField(
index="not_analyzed",
omit_norms=True # Saves space, faster filtering
))
# Use appropriate term vectors (only when needed)
optimized_mapping.add_property("content", StringField(
analyzer="english",
term_vector="with_positions_offsets", # Only if highlighting needed
store=False # Use _source instead of stored fields
))
# 3. Optimize date fields
optimized_mapping.add_property("timestamp", DateField(
precision_step=4, # Better range query performance
format="epoch_millis" # Faster parsing
))
# 4. Use doc_values for sorting/aggregation fields
optimized_mapping.add_property("sort_field", StringField(
index="not_analyzed",
doc_values=True # Faster sorting/aggregation
))
return optimized_mapping
# Memory optimization
def optimize_mapping_memory():
"""Optimize mapping for memory usage."""
memory_mapping = Mapper()
# Disable _all field if not needed (saves space and indexing time)
memory_mapping._all = {"enabled": False}
# Use compressed storage for large text fields
memory_mapping.add_property("large_content", StringField(
analyzer="english",
compress=True, # Compress stored content
compress_threshold="100b" # Compress if larger than 100 bytes
))
# Disable source for fields not needed in results
memory_mapping._source = {
"excludes": ["internal_field", "temp_data"]
}
return memory_mapping# Handle mapping changes and schema evolution
def handle_mapping_evolution():
"""Strategies for evolving mappings over time."""
# 1. Additive changes (safe)
def add_new_field():
"""Add new field to existing mapping."""
new_field_mapping = {
"properties": {
"new_feature": {"type": "string", "analyzer": "english"}
}
}
es.indices.put_mapping("document", new_field_mapping, indices=["myindex"])
# 2. Breaking changes (require reindexing)
def change_field_type():
"""Handle field type changes that require reindexing."""
# Create new index with updated mapping
new_mapping = create_updated_mapping()
es.indices.create_index("myindex_v2")
es.indices.put_mapping("document", new_mapping.to_dict(), indices=["myindex_v2"])
# Reindex data (in production, use scroll/bulk for large datasets)
# This is a simplified example
old_docs = es.search({"query": {"match_all": {}}}, indices=["myindex"], size=1000)
for doc in old_docs:
# Transform document if needed
transformed_doc = transform_document(doc)
es.index(transformed_doc, "myindex_v2", "document", id=doc._meta.id)
# Switch aliases
es.indices.change_aliases([
{"remove": {"index": "myindex", "alias": "current"}},
{"add": {"index": "myindex_v2", "alias": "current"}}
])
# 3. Version-aware mapping
def create_versioned_mapping():
"""Create mapping with version information for tracking."""
versioned_mapping = Mapper()
versioned_mapping.add_property("_mapping_version", IntegerField())
versioned_mapping.add_property("title", StringField(analyzer="english"))
# ... other fields
return versioned_mapping
def create_updated_mapping():
"""Create updated mapping for schema evolution."""
mapping = Mapper()
# Updated field definitions
mapping.add_property("title", StringField(analyzer="english"))
mapping.add_property("content", StringField(analyzer="english"))
# Changed field type
mapping.add_property("price", DoubleField()) # Changed from FloatField
return mapping
def transform_document(doc):
"""Transform document during reindexing."""
# Example transformations
if hasattr(doc, 'old_field'):
doc.new_field = transform_old_field(doc.old_field)
delattr(doc, 'old_field')
return docPyES mapping management provides comprehensive control over ElasticSearch index schemas, enabling efficient data storage, fast searching, and proper data type handling while supporting schema evolution and performance optimization.
Install with Tessl CLI
npx tessl i tessl/pypi-pyes