CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pyes

Python Elastic Search driver providing a pythonic interface for interacting with ElasticSearch clusters

Pending
Overview
Eval results
Files

mappings.mddocs/

PyES Mappings and Schema Management

Overview

PyES provides comprehensive mapping management for defining ElasticSearch index schemas. Mappings define how documents and their fields are stored and indexed, including field types, analyzers, and indexing options. Proper mapping design is crucial for search performance, data integrity, and storage efficiency.

Core Mapping Classes

Mapper

class Mapper:
    """
    Main mapping management class for ElasticSearch indices.
    
    Handles document type mappings, field definitions, and schema operations.
    """
    
    def __init__(self):
        """Initialize Mapper instance."""
        pass
    
    def get_doctype(self, name):
        """
        Get document type mapping by name.
        
        Args:
            name (str): Document type name
            
        Returns:
            DocumentObjectField: Document type mapping
        """
        pass
    
    def to_dict(self):
        """
        Convert mapper to dictionary format.
        
        Returns:
            dict: Mapping dictionary for ElasticSearch
        """
        pass
    
    def add_property(self, name, field):
        """
        Add field property to mapping.
        
        Args:
            name (str): Field name
            field (AbstractField): Field definition
        """
        pass
    
    def create_index_if_missing(self, index_name):
        """
        Create index if it doesn't exist.
        
        Args:
            index_name (str): Index name to create
        """
        pass

# Basic mapper usage
from pyes import Mapper, StringField, IntegerField, DateField

# Create mapping for blog posts
blog_mapping = Mapper()
blog_mapping.add_property("title", StringField(analyzer="standard"))
blog_mapping.add_property("content", StringField(analyzer="english"))
blog_mapping.add_property("view_count", IntegerField())
blog_mapping.add_property("published_date", DateField())

# Apply mapping to index
es.indices.put_mapping("blog_post", blog_mapping.to_dict(), indices=["blog"])

Base Field Classes

AbstractField

class AbstractField:
    """
    Base class for all field types.
    
    Defines common field properties and behavior.
    """
    
    def __init__(self, index=None, store=None, boost=None, 
                 null_value=None, include_in_all=None, **kwargs):
        """
        Initialize base field.
        
        Args:
            index (str, optional): Index option (analyzed, not_analyzed, no)
            store (bool, optional): Store field value separately
            boost (float, optional): Field boost factor for scoring
            null_value (any, optional): Default value for null fields
            include_in_all (bool, optional): Include field in _all field
            **kwargs: Additional field-specific parameters
        """
        pass
    
    def as_dict(self):
        """
        Convert field to dictionary representation.
        
        Returns:
            dict: Field definition for ElasticSearch mapping
        """
        pass

String and Text Fields

StringField

class StringField(AbstractField):
    """
    String/text field for textual content.
    
    Supports full-text search, analysis, and various string operations.
    """
    
    def __init__(self, analyzer=None, index_analyzer=None, search_analyzer=None,
                 index=None, store=None, term_vector=None, boost=None,
                 null_value=None, omit_norms=None, omit_term_freq_and_positions=None,
                 include_in_all=None, **kwargs):
        """
        Initialize StringField.
        
        Args:
            analyzer (str, optional): Analyzer for indexing and searching
            index_analyzer (str, optional): Analyzer for indexing only
            search_analyzer (str, optional): Analyzer for searching only
            index (str, optional): Index option (analyzed, not_analyzed, no)
            store (bool, optional): Store original field value
            term_vector (str, optional): Term vector option (no, yes, with_offsets, 
                                        with_positions, with_positions_offsets)
            boost (float, optional): Field boost for relevance
            null_value (str, optional): Default value for null
            omit_norms (bool, optional): Omit field-length normalization
            omit_term_freq_and_positions (bool, optional): Omit term frequency/positions
            include_in_all (bool, optional): Include in _all field
            **kwargs: Additional string field parameters
        """
        pass

# Text field configurations
from pyes import StringField

# Full-text search field with English analyzer
content_field = StringField(
    analyzer="english",
    term_vector="with_positions_offsets",  # For highlighting
    store=False  # Don't store original (use _source)
)

# Exact-match keyword field
category_field = StringField(
    index="not_analyzed",  # No analysis for exact matching
    store=True,
    boost=1.5
)

# Multi-language field with custom analyzer
title_field = StringField(
    analyzer="standard",
    search_analyzer="english",  # Different analyzer for search
    include_in_all=True
)

# Non-indexed field for display only
description_field = StringField(
    index="no",  # Not searchable
    store=True   # But stored for retrieval
)

Numeric Fields

Base Numeric Field

class NumericFieldAbstract(AbstractField):
    """
    Base class for numeric field types.
    
    Provides common numeric field functionality.
    """
    
    def __init__(self, precision_step=None, **kwargs):
        """
        Initialize numeric field.
        
        Args:
            precision_step (int, optional): Precision step for range queries
            **kwargs: Additional numeric field parameters
        """
        pass

Integer Fields

class IntegerField(NumericFieldAbstract):
    """
    32-bit signed integer field (-2^31 to 2^31-1).
    """
    
    def __init__(self, **kwargs):
        """Initialize IntegerField."""
        pass

class LongField(NumericFieldAbstract):
    """
    64-bit signed integer field (-2^63 to 2^63-1).
    """
    
    def __init__(self, **kwargs):
        """Initialize LongField."""
        pass

class ShortField(NumericFieldAbstract):
    """
    16-bit signed integer field (-32,768 to 32,767).
    """
    
    def __init__(self, **kwargs):
        """Initialize ShortField."""
        pass

class ByteField(NumericFieldAbstract):
    """
    8-bit signed integer field (-128 to 127).
    """
    
    def __init__(self, **kwargs):
        """Initialize ByteField."""
        pass

# Integer field usage
from pyes import IntegerField, LongField, ShortField, ByteField

# Standard counters and IDs
user_id_field = IntegerField()
view_count_field = IntegerField(null_value=0)

# Large numbers (timestamps, large counters)
timestamp_field = LongField()
total_bytes_field = LongField()

# Small numbers (status codes, categories)
status_code_field = ShortField()
priority_field = ByteField(null_value=0)

Floating Point Fields

class FloatField(NumericFieldAbstract):
    """
    32-bit floating point field (IEEE 754).
    """
    
    def __init__(self, **kwargs):
        """Initialize FloatField."""
        pass

class DoubleField(NumericFieldAbstract):
    """
    64-bit floating point field (IEEE 754).
    """
    
    def __init__(self, **kwargs):
        """Initialize DoubleField."""
        pass

# Floating point usage
from pyes import FloatField, DoubleField

# Standard precision
price_field = FloatField(null_value=0.0)
rating_field = FloatField()

# High precision calculations
latitude_field = DoubleField()
longitude_field = DoubleField()
precise_calculation_field = DoubleField()

Specialized Fields

Date Field

class DateField(AbstractField):
    """
    Date and datetime field with flexible format support.
    """
    
    def __init__(self, format=None, precision_step=None, **kwargs):
        """
        Initialize DateField.
        
        Args:
            format (str, optional): Date format pattern(s)
            precision_step (int, optional): Precision step for range queries
            **kwargs: Additional date field parameters
        """
        pass

# Date field configurations
from pyes import DateField

# ISO date format (default)
published_date_field = DateField()

# Custom date format
custom_date_field = DateField(format="yyyy-MM-dd HH:mm:ss")

# Multiple date formats
flexible_date_field = DateField(
    format="yyyy-MM-dd||yyyy-MM-dd HH:mm:ss||epoch_millis"
)

# Date with precision step for better range performance
timestamp_field = DateField(
    precision_step=4,  # Better range query performance
    format="epoch_millis"
)

Boolean Field

class BooleanField(AbstractField):
    """
    Boolean field for true/false values.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize BooleanField.
        
        Args:
            **kwargs: Additional boolean field parameters
        """
        pass

# Boolean field usage
from pyes import BooleanField

# Simple boolean flags
is_published_field = BooleanField(null_value=False)
featured_field = BooleanField()
is_active_field = BooleanField(null_value=True)

Binary Field

class BinaryField(AbstractField):
    """
    Binary data field for storing base64-encoded binary data.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize BinaryField.
        
        Args:
            **kwargs: Additional binary field parameters
        """
        pass

# Binary data storage
from pyes import BinaryField

# File attachments
file_content_field = BinaryField(store=True)
thumbnail_field = BinaryField()
encrypted_data_field = BinaryField()

IP Address Field

class IpField(AbstractField):
    """
    IP address field for IPv4 addresses.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize IpField.
        
        Args:
            **kwargs: Additional IP field parameters
        """
        pass

# IP address tracking
from pyes import IpField

# Network addresses
client_ip_field = IpField()
server_ip_field = IpField()
proxy_ip_field = IpField()

Geospatial Fields

Geo Point Field

class GeoPointField(AbstractField):
    """
    Geographic point field for latitude/longitude coordinates.
    """
    
    def __init__(self, lat_lon=None, geohash=None, geohash_precision=None, **kwargs):
        """
        Initialize GeoPointField.
        
        Args:
            lat_lon (bool, optional): Enable lat/lon format
            geohash (bool, optional): Enable geohash format
            geohash_precision (int, optional): Geohash precision level
            **kwargs: Additional geo point parameters
        """
        pass

# Geographic location fields
from pyes import GeoPointField

# Basic location tracking
location_field = GeoPointField()

# Location with geohash support for proximity searches
restaurant_location_field = GeoPointField(
    lat_lon=True,
    geohash=True,
    geohash_precision=12
)

# Event location
event_coordinates_field = GeoPointField(lat_lon=True)

Complex Field Types

Multi Field

class MultiField(AbstractField):
    """
    Multi-field mapping for analyzing the same content in different ways.
    
    Allows a field to be indexed multiple times with different analyzers.
    """
    
    def __init__(self, name, type=None, path="just_name", fields=None, **kwargs):
        """
        Initialize MultiField.
        
        Args:
            name (str): Field name
            type (str, optional): Main field type
            path (str): Path type for field names. Default: "just_name"
            fields (dict, optional): Sub-field definitions
            **kwargs: Additional multi-field parameters
        """
        pass

# Multi-field for different analysis approaches
from pyes import MultiField, StringField

# Title field with both analyzed and exact versions
title_multifield = MultiField("title", type="string", fields={
    "analyzed": StringField(analyzer="english"),
    "exact": StringField(index="not_analyzed"),
    "suggest": StringField(analyzer="simple")
})

# Name field with different analyzers
name_multifield = MultiField("name", type="string", fields={
    "standard": StringField(analyzer="standard"),
    "keyword": StringField(index="not_analyzed"),
    "phonetic": StringField(analyzer="phonetic_analyzer")
})

Object Field

class ObjectField(AbstractField):
    """
    Object field for nested JSON objects with properties.
    """
    
    def __init__(self, properties=None, dynamic=None, enabled=None, 
                 include_in_all=None, **kwargs):
        """
        Initialize ObjectField.
        
        Args:
            properties (dict, optional): Object property definitions
            dynamic (bool|str, optional): Dynamic mapping behavior
            enabled (bool, optional): Enable/disable object indexing
            include_in_all (bool, optional): Include in _all field
            **kwargs: Additional object field parameters
        """
        pass

# Nested object structures
from pyes import ObjectField

# Address object with properties
address_field = ObjectField(properties={
    "street": StringField(),
    "city": StringField(index="not_analyzed"),
    "state": StringField(index="not_analyzed"),
    "zip_code": StringField(index="not_analyzed"),
    "country": StringField(index="not_analyzed")
})

# User profile object
profile_field = ObjectField(
    dynamic=True,  # Allow new properties
    properties={
        "display_name": StringField(analyzer="standard"),
        "bio": StringField(analyzer="english"),
        "avatar_url": StringField(index="no"),
        "social_links": ObjectField(enabled=False)  # Store but don't index
    }
)

Nested Object Field

class NestedObject(AbstractField):
    """
    Nested object field that maintains object relationships.
    
    Unlike ObjectField, NestedObject preserves the relationship between
    properties within the same nested object.
    """
    
    def __init__(self, properties=None, dynamic=None, include_in_all=None, **kwargs):
        """
        Initialize NestedObject.
        
        Args:
            properties (dict, optional): Nested object property definitions
            dynamic (bool|str, optional): Dynamic mapping behavior
            include_in_all (bool, optional): Include in _all field
            **kwargs: Additional nested object parameters
        """
        pass

# Nested objects with preserved relationships
from pyes import NestedObject

# Product variants as nested objects
variants_field = NestedObject(properties={
    "sku": StringField(index="not_analyzed"),
    "color": StringField(index="not_analyzed"),
    "size": StringField(index="not_analyzed"),
    "price": FloatField(),
    "stock_quantity": IntegerField(),
    "is_available": BooleanField()
})

# Comment threads as nested objects
comments_field = NestedObject(properties={
    "author": StringField(index="not_analyzed"),
    "content": StringField(analyzer="english"),
    "timestamp": DateField(),
    "rating": IntegerField(),
    "is_approved": BooleanField()
})

Document Object Field

class DocumentObjectField:
    """
    Document-level mapping definition.
    
    Represents the top-level mapping for a document type.
    """
    
    def __init__(self, name=None, **kwargs):
        """
        Initialize DocumentObjectField.
        
        Args:
            name (str, optional): Document type name
            **kwargs: Document-level mapping parameters
        """
        pass
    
    def add_property(self, name, field):
        """
        Add property to document mapping.
        
        Args:
            name (str): Property name
            field (AbstractField): Field definition
        """
        pass

# Complete document mapping
from pyes import DocumentObjectField

# Blog post document mapping
blog_post_mapping = DocumentObjectField("blog_post")
blog_post_mapping.add_property("title", StringField(analyzer="english", boost=2.0))
blog_post_mapping.add_property("content", StringField(analyzer="english"))
blog_post_mapping.add_property("summary", StringField(analyzer="english"))
blog_post_mapping.add_property("author", StringField(index="not_analyzed"))
blog_post_mapping.add_property("category", StringField(index="not_analyzed"))
blog_post_mapping.add_property("tags", StringField(index="not_analyzed"))
blog_post_mapping.add_property("published_date", DateField())
blog_post_mapping.add_property("view_count", IntegerField(null_value=0))
blog_post_mapping.add_property("rating", FloatField())
blog_post_mapping.add_property("is_featured", BooleanField(null_value=False))
blog_post_mapping.add_property("location", GeoPointField())

Attachment Field

class AttachmentField(AbstractField):
    """
    Attachment field for file content extraction and indexing.
    
    Requires mapper-attachments plugin for ElasticSearch.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize AttachmentField.
        
        Args:
            **kwargs: Additional attachment field parameters
        """
        pass

# File attachment indexing
from pyes import AttachmentField

# Document attachment with extracted content
file_attachment_field = AttachmentField()

# The attachment field will automatically extract:
# - content: Extracted text content
# - title: Document title
# - author: Document author  
# - keywords: Document keywords
# - date: Document creation date
# - content_type: File MIME type
# - content_length: File size
# - language: Detected language

Complete Mapping Examples

E-commerce Product Mapping

# Comprehensive e-commerce product mapping
from pyes import (Mapper, StringField, IntegerField, FloatField, BooleanField,
                  DateField, MultiField, NestedObject, ObjectField)

def create_product_mapping():
    """Create comprehensive product mapping for e-commerce."""
    
    mapping = Mapper()
    
    # Basic product information
    mapping.add_property("name", MultiField("name", type="string", fields={
        "analyzed": StringField(analyzer="english", boost=2.0),
        "exact": StringField(index="not_analyzed"),
        "suggest": StringField(analyzer="simple")
    }))
    
    mapping.add_property("description", StringField(
        analyzer="english",
        term_vector="with_positions_offsets"
    ))
    
    mapping.add_property("sku", StringField(index="not_analyzed"))
    mapping.add_property("brand", StringField(index="not_analyzed", boost=1.5))
    mapping.add_property("category", StringField(index="not_analyzed"))
    
    # Pricing and inventory
    mapping.add_property("price", FloatField())
    mapping.add_property("sale_price", FloatField())
    mapping.add_property("cost", FloatField())
    mapping.add_property("stock_quantity", IntegerField(null_value=0))
    mapping.add_property("is_in_stock", BooleanField())
    mapping.add_property("low_stock_threshold", IntegerField())
    
    # Product status
    mapping.add_property("is_active", BooleanField(null_value=True))
    mapping.add_property("is_featured", BooleanField(null_value=False))
    mapping.add_property("is_on_sale", BooleanField(null_value=False))
    
    # Dates
    mapping.add_property("created_date", DateField())
    mapping.add_property("updated_date", DateField())
    mapping.add_property("launch_date", DateField())
    mapping.add_property("discontinue_date", DateField())
    
    # Ratings and reviews
    mapping.add_property("average_rating", FloatField())
    mapping.add_property("review_count", IntegerField(null_value=0))
    mapping.add_property("total_sales", IntegerField(null_value=0))
    
    # Product variants as nested objects
    mapping.add_property("variants", NestedObject(properties={
        "sku": StringField(index="not_analyzed"),
        "color": StringField(index="not_analyzed"),
        "size": StringField(index="not_analyzed"),
        "material": StringField(index="not_analyzed"),
        "price": FloatField(),
        "stock_quantity": IntegerField(),
        "is_available": BooleanField(),
        "weight": FloatField(),
        "dimensions": ObjectField(properties={
            "length": FloatField(),
            "width": FloatField(),
            "height": FloatField()
        })
    }))
    
    # Product attributes (dynamic object)
    mapping.add_property("attributes", ObjectField(
        dynamic=True,  # Allow arbitrary attributes
        properties={
            "color": StringField(index="not_analyzed"),
            "size": StringField(index="not_analyzed"),
            "material": StringField(index="not_analyzed"),
            "style": StringField(index="not_analyzed")
        }
    ))
    
    # SEO fields
    mapping.add_property("meta_title", StringField(analyzer="english"))
    mapping.add_property("meta_description", StringField(analyzer="english"))
    mapping.add_property("keywords", StringField(analyzer="keyword"))
    mapping.add_property("url_slug", StringField(index="not_analyzed"))
    
    # Images
    mapping.add_property("primary_image", StringField(index="no"))
    mapping.add_property("gallery_images", StringField(index="no"))
    
    return mapping

# Apply product mapping
product_mapping = create_product_mapping()
es.indices.put_mapping("product", product_mapping.to_dict(), indices=["catalog"])

User Profile Mapping

# User profile and account mapping
def create_user_mapping():
    """Create comprehensive user profile mapping."""
    
    mapping = Mapper()
    
    # Basic user information
    mapping.add_property("username", StringField(index="not_analyzed"))
    mapping.add_property("email", StringField(index="not_analyzed"))
    mapping.add_property("first_name", StringField(analyzer="standard"))
    mapping.add_property("last_name", StringField(analyzer="standard"))
    
    # Full name with multi-field analysis
    mapping.add_property("full_name", MultiField("full_name", type="string", fields={
        "analyzed": StringField(analyzer="standard"),
        "exact": StringField(index="not_analyzed"),
        "phonetic": StringField(analyzer="phonetic")  # Custom analyzer needed
    }))
    
    # Profile information
    mapping.add_property("bio", StringField(analyzer="english"))
    mapping.add_property("title", StringField(analyzer="standard"))
    mapping.add_property("company", StringField(index="not_analyzed"))
    mapping.add_property("department", StringField(index="not_analyzed"))
    
    # Contact information
    mapping.add_property("phone", StringField(index="not_analyzed"))
    mapping.add_property("mobile", StringField(index="not_analyzed"))
    mapping.add_property("website", StringField(index="no"))
    
    # Address as nested object
    mapping.add_property("addresses", NestedObject(properties={
        "type": StringField(index="not_analyzed"),  # home, work, billing
        "street": StringField(),
        "city": StringField(index="not_analyzed"),
        "state": StringField(index="not_analyzed"),
        "postal_code": StringField(index="not_analyzed"),
        "country": StringField(index="not_analyzed"),
        "is_primary": BooleanField()
    }))
    
    # User status and flags
    mapping.add_property("is_active", BooleanField(null_value=True))
    mapping.add_property("is_verified", BooleanField(null_value=False))
    mapping.add_property("is_premium", BooleanField(null_value=False))
    mapping.add_property("account_type", StringField(index="not_analyzed"))
    
    # Dates and timestamps
    mapping.add_property("created_date", DateField())
    mapping.add_property("last_login", DateField())
    mapping.add_property("last_updated", DateField())
    mapping.add_property("birth_date", DateField())
    
    # Preferences and settings
    mapping.add_property("preferences", ObjectField(
        dynamic=True,
        properties={
            "language": StringField(index="not_analyzed"),
            "timezone": StringField(index="not_analyzed"),
            "notifications": ObjectField(enabled=False),  # Store but don't index
            "privacy_settings": ObjectField(enabled=False)
        }
    ))
    
    # Social and interests
    mapping.add_property("interests", StringField(index="not_analyzed"))
    mapping.add_property("skills", StringField(index="not_analyzed"))
    mapping.add_property("social_links", ObjectField(
        properties={
            "linkedin": StringField(index="no"),
            "twitter": StringField(index="no"),
            "github": StringField(index="no")
        }
    ))
    
    # Activity metrics
    mapping.add_property("login_count", IntegerField(null_value=0))
    mapping.add_property("post_count", IntegerField(null_value=0))
    mapping.add_property("reputation_score", IntegerField(null_value=0))
    
    return mapping

# Apply user mapping
user_mapping = create_user_mapping()
es.indices.put_mapping("user", user_mapping.to_dict(), indices=["users"])

Content Management Mapping

# CMS content mapping with rich media support
def create_content_mapping():
    """Create comprehensive content management mapping."""
    
    mapping = Mapper()
    
    # Content identification
    mapping.add_property("title", MultiField("title", type="string", fields={
        "analyzed": StringField(analyzer="english", boost=3.0),
        "exact": StringField(index="not_analyzed"),
        "suggest": StringField(analyzer="simple")
    }))
    
    mapping.add_property("slug", StringField(index="not_analyzed"))
    mapping.add_property("content_id", StringField(index="not_analyzed"))
    
    # Content body with rich analysis
    mapping.add_property("content", StringField(
        analyzer="english",
        term_vector="with_positions_offsets",  # For highlighting
        store=False  # Use _source instead
    ))
    
    mapping.add_property("excerpt", StringField(analyzer="english"))
    mapping.add_property("summary", StringField(analyzer="english"))
    
    # Content metadata
    mapping.add_property("content_type", StringField(index="not_analyzed"))
    mapping.add_property("format", StringField(index="not_analyzed"))  # html, markdown, etc.
    mapping.add_property("language", StringField(index="not_analyzed"))
    mapping.add_property("word_count", IntegerField())
    mapping.add_property("reading_time", IntegerField())  # minutes
    
    # Authoring information
    mapping.add_property("author", ObjectField(properties={
        "id": StringField(index="not_analyzed"),
        "name": StringField(analyzer="standard"),
        "email": StringField(index="not_analyzed"),
        "bio": StringField(analyzer="english")
    }))
    
    mapping.add_property("contributors", NestedObject(properties={
        "id": StringField(index="not_analyzed"),
        "name": StringField(analyzer="standard"),
        "role": StringField(index="not_analyzed")  # editor, reviewer, etc.
    }))
    
    # Publication workflow
    mapping.add_property("status", StringField(index="not_analyzed"))
    mapping.add_property("workflow_state", StringField(index="not_analyzed"))
    mapping.add_property("is_published", BooleanField())
    mapping.add_property("is_featured", BooleanField(null_value=False))
    mapping.add_property("is_premium", BooleanField(null_value=False))
    
    # Dates and scheduling
    mapping.add_property("created_date", DateField())
    mapping.add_property("updated_date", DateField())
    mapping.add_property("published_date", DateField())
    mapping.add_property("scheduled_date", DateField())
    mapping.add_property("expiry_date", DateField())
    
    # Categorization and tagging
    mapping.add_property("category", StringField(index="not_analyzed"))
    mapping.add_property("subcategory", StringField(index="not_analyzed"))
    mapping.add_property("tags", StringField(index="not_analyzed"))
    mapping.add_property("topics", StringField(index="not_analyzed"))
    
    # SEO and social
    mapping.add_property("seo", ObjectField(properties={
        "meta_title": StringField(analyzer="english"),
        "meta_description": StringField(analyzer="english"),
        "keywords": StringField(analyzer="keyword"),
        "canonical_url": StringField(index="no"),
        "og_title": StringField(analyzer="english"),
        "og_description": StringField(analyzer="english"),
        "og_image": StringField(index="no")
    }))
    
    # Media attachments
    mapping.add_property("media", NestedObject(properties={
        "type": StringField(index="not_analyzed"),  # image, video, audio, document
        "url": StringField(index="no"),
        "title": StringField(analyzer="standard"),
        "alt_text": StringField(analyzer="english"),
        "caption": StringField(analyzer="english"),
        "file_size": IntegerField(),
        "mime_type": StringField(index="not_analyzed"),
        "dimensions": ObjectField(properties={
            "width": IntegerField(),
            "height": IntegerField()
        })
    }))
    
    # Engagement metrics
    mapping.add_property("view_count", IntegerField(null_value=0))
    mapping.add_property("like_count", IntegerField(null_value=0))
    mapping.add_property("share_count", IntegerField(null_value=0))
    mapping.add_property("comment_count", IntegerField(null_value=0))
    mapping.add_property("average_rating", FloatField())
    
    # Content relationships
    mapping.add_property("related_content", StringField(index="not_analyzed"))
    mapping.add_property("parent_content", StringField(index="not_analyzed"))
    mapping.add_property("series_id", StringField(index="not_analyzed"))
    
    return mapping

# Apply content mapping  
content_mapping = create_content_mapping()
es.indices.put_mapping("content", content_mapping.to_dict(), indices=["cms"])

Mapping Management Operations

Dynamic Mapping Control

# Control dynamic mapping behavior
def configure_dynamic_mapping():
    """Configure dynamic mapping settings for flexible schemas."""
    
    # Strict mapping (no new fields allowed)
    strict_mapping = {
        "dynamic": "strict",
        "properties": {
            "title": {"type": "string", "analyzer": "english"},
            "content": {"type": "string", "analyzer": "english"}
        }
    }
    
    # Dynamic mapping with templates
    dynamic_mapping = {
        "dynamic": True,
        "dynamic_templates": [
            {
                "strings_as_keywords": {
                    "match": "*_keyword",
                    "mapping": {
                        "type": "string",
                        "index": "not_analyzed"
                    }
                }
            },
            {
                "strings_as_text": {
                    "match": "*_text",
                    "mapping": {
                        "type": "string",
                        "analyzer": "english"
                    }
                }
            },
            {
                "dates": {
                    "match": "*_date",
                    "mapping": {
                        "type": "date",
                        "format": "yyyy-MM-dd||epoch_millis"
                    }
                }
            }
        ],
        "properties": {
            # Explicit field definitions
            "id": {"type": "string", "index": "not_analyzed"}
        }
    }
    
    # Apply dynamic mapping
    es.indices.put_mapping("flexible_doc", dynamic_mapping, indices=["flexible"])
    
    return dynamic_mapping

# Index settings for mapping optimization
def create_optimized_index_settings():
    """Create index with optimized settings for mapping performance."""
    
    settings = {
        "settings": {
            "number_of_shards": 5,
            "number_of_replicas": 1,
            "analysis": {
                "analyzer": {
                    "custom_english": {
                        "type": "english",
                        "stopwords": ["the", "and", "or", "but"]
                    },
                    "autocomplete": {
                        "tokenizer": "keyword",
                        "filters": ["lowercase", "edge_ngram"]
                    }
                },
                "filter": {
                    "edge_ngram": {
                        "type": "edge_ngram",
                        "min_gram": 1,
                        "max_gram": 20
                    }
                }
            }
        },
        "mappings": {
            "document": {
                "properties": {
                    "title": {
                        "type": "multi_field",
                        "fields": {
                            "analyzed": {"type": "string", "analyzer": "custom_english"},
                            "autocomplete": {"type": "string", "analyzer": "autocomplete"}
                        }
                    }
                }
            }
        }
    }
    
    # Create index with settings and mapping
    es.indices.create_index("optimized_index", settings)
    
    return settings

Best Practices

Performance Optimization

# Mapping performance optimization strategies
def optimize_mapping_performance():
    """Best practices for high-performance mappings."""
    
    # 1. Use appropriate field types
    optimized_mapping = Mapper()
    
    # Use keyword fields for exact matches (faster than analyzed strings)
    optimized_mapping.add_property("status", StringField(index="not_analyzed"))
    
    # Use appropriate numeric types (don't use long for small numbers)
    optimized_mapping.add_property("count", IntegerField())  # Not LongField
    
    # Disable indexing for display-only fields
    optimized_mapping.add_property("description", StringField(index="no", store=True))
    
    # 2. Optimize string field settings
    # Disable norms for fields that don't need scoring
    optimized_mapping.add_property("category", StringField(
        index="not_analyzed",
        omit_norms=True  # Saves space, faster filtering
    ))
    
    # Use appropriate term vectors (only when needed)
    optimized_mapping.add_property("content", StringField(
        analyzer="english",
        term_vector="with_positions_offsets",  # Only if highlighting needed
        store=False  # Use _source instead of stored fields
    ))
    
    # 3. Optimize date fields
    optimized_mapping.add_property("timestamp", DateField(
        precision_step=4,  # Better range query performance
        format="epoch_millis"  # Faster parsing
    ))
    
    # 4. Use doc_values for sorting/aggregation fields
    optimized_mapping.add_property("sort_field", StringField(
        index="not_analyzed",
        doc_values=True  # Faster sorting/aggregation
    ))
    
    return optimized_mapping

# Memory optimization
def optimize_mapping_memory():
    """Optimize mapping for memory usage."""
    
    memory_mapping = Mapper()
    
    # Disable _all field if not needed (saves space and indexing time)
    memory_mapping._all = {"enabled": False}
    
    # Use compressed storage for large text fields
    memory_mapping.add_property("large_content", StringField(
        analyzer="english",
        compress=True,  # Compress stored content
        compress_threshold="100b"  # Compress if larger than 100 bytes
    ))
    
    # Disable source for fields not needed in results
    memory_mapping._source = {
        "excludes": ["internal_field", "temp_data"]
    }
    
    return memory_mapping

Schema Evolution

# Handle mapping changes and schema evolution
def handle_mapping_evolution():
    """Strategies for evolving mappings over time."""
    
    # 1. Additive changes (safe)
    def add_new_field():
        """Add new field to existing mapping."""
        new_field_mapping = {
            "properties": {
                "new_feature": {"type": "string", "analyzer": "english"}
            }
        }
        es.indices.put_mapping("document", new_field_mapping, indices=["myindex"])
    
    # 2. Breaking changes (require reindexing)
    def change_field_type():
        """Handle field type changes that require reindexing."""
        
        # Create new index with updated mapping
        new_mapping = create_updated_mapping()
        es.indices.create_index("myindex_v2")
        es.indices.put_mapping("document", new_mapping.to_dict(), indices=["myindex_v2"])
        
        # Reindex data (in production, use scroll/bulk for large datasets)
        # This is a simplified example
        old_docs = es.search({"query": {"match_all": {}}}, indices=["myindex"], size=1000)
        
        for doc in old_docs:
            # Transform document if needed
            transformed_doc = transform_document(doc)
            es.index(transformed_doc, "myindex_v2", "document", id=doc._meta.id)
        
        # Switch aliases
        es.indices.change_aliases([
            {"remove": {"index": "myindex", "alias": "current"}},
            {"add": {"index": "myindex_v2", "alias": "current"}}
        ])
    
    # 3. Version-aware mapping
    def create_versioned_mapping():
        """Create mapping with version information for tracking."""
        
        versioned_mapping = Mapper()
        versioned_mapping.add_property("_mapping_version", IntegerField())
        versioned_mapping.add_property("title", StringField(analyzer="english"))
        # ... other fields
        
        return versioned_mapping

def create_updated_mapping():
    """Create updated mapping for schema evolution."""
    
    mapping = Mapper()
    # Updated field definitions
    mapping.add_property("title", StringField(analyzer="english"))
    mapping.add_property("content", StringField(analyzer="english"))
    # Changed field type
    mapping.add_property("price", DoubleField())  # Changed from FloatField
    
    return mapping

def transform_document(doc):
    """Transform document during reindexing."""
    
    # Example transformations
    if hasattr(doc, 'old_field'):
        doc.new_field = transform_old_field(doc.old_field)
        delattr(doc, 'old_field')
    
    return doc

PyES mapping management provides comprehensive control over ElasticSearch index schemas, enabling efficient data storage, fast searching, and proper data type handling while supporting schema evolution and performance optimization.

Install with Tessl CLI

npx tessl i tessl/pypi-pyes

docs

bulk-operations.md

client.md

facets-aggregations.md

filters.md

index.md

mappings.md

query-dsl.md

rivers.md

tile.json