Python Elastic Search driver providing a pythonic interface for interacting with ElasticSearch clusters
—
The PyES Filter DSL provides fast, non-scored document filtering capabilities. Filters are executed in filter context, which means they don't affect document scores and are cached by ElasticSearch for better performance. Use filters when you need fast yes/no matching without relevance scoring.
class Filter:
"""
Base class for all ElasticSearch filters.
All filter classes inherit from this base class and implement
the serialize() method to convert to ElasticSearch JSON.
"""
def serialize(self):
"""
Convert filter to ElasticSearch JSON format.
Returns:
dict: ElasticSearch filter JSON
"""
passclass FilterList:
"""
Container for multiple filters that can be combined.
"""
def __init__(self, filters=None):
"""
Initialize FilterList.
Args:
filters (list, optional): Initial list of filters
"""
pass
def add(self, filter):
"""
Add filter to the list.
Args:
filter (Filter): Filter to add
"""
passclass BoolFilter(Filter):
"""
Boolean combination of filters with must/must_not/should clauses.
Provides full boolean logic for complex filtering requirements.
"""
def __init__(self, must=None, must_not=None, should=None, **kwargs):
"""
Initialize BoolFilter.
Args:
must (Filter|list): Filters that must match (AND logic)
must_not (Filter|list): Filters that must not match (NOT logic)
should (Filter|list): Filters that should match (OR logic)
**kwargs: Additional boolean filter parameters
"""
pass
# Complex boolean filtering
from pyes import BoolFilter, TermFilter, RangeFilter, ExistsFilter
# Documents that must be published, must not be drafts,
# and should be either featured or have high views
bool_filter = BoolFilter(
must=[
TermFilter("status", "published"),
ExistsFilter("content")
],
must_not=[
TermFilter("category", "draft"),
TermFilter("author", "spam_user")
],
should=[
TermFilter("featured", True),
RangeFilter("view_count", gte=1000)
]
)
# Apply filter to search
from pyes import Search, MatchAllQuery
search = Search(MatchAllQuery()).filter(bool_filter)
results = es.search(search, indices=["blog"])class ANDFilter(Filter):
"""
Logical AND combination of multiple filters.
All filters must match for document to pass.
"""
def __init__(self, filters):
"""
Initialize ANDFilter.
Args:
filters (list): List of filters to combine with AND logic
"""
pass
# AND filter combination
from pyes import ANDFilter, TermFilter, RangeFilter
# Must match all conditions
and_filter = ANDFilter([
TermFilter("status", "published"),
TermFilter("category", "tutorial"),
RangeFilter("published_date", gte="2023-01-01")
])class ORFilter(Filter):
"""
Logical OR combination of multiple filters.
At least one filter must match for document to pass.
"""
def __init__(self, filters):
"""
Initialize ORFilter.
Args:
filters (list): List of filters to combine with OR logic
"""
pass
# OR filter combination
from pyes import ORFilter, TermFilter
# Match any of these categories
category_filter = ORFilter([
TermFilter("category", "tutorial"),
TermFilter("category", "guide"),
TermFilter("category", "reference")
])class NotFilter(Filter):
"""
Logical negation of a filter.
Documents that do NOT match the wrapped filter pass.
"""
def __init__(self, filter):
"""
Initialize NotFilter.
Args:
filter (Filter): Filter to negate
"""
pass
# NOT filter negation
from pyes import NotFilter, TermFilter
# Exclude draft documents
not_draft_filter = NotFilter(TermFilter("status", "draft"))class TermFilter(Filter):
"""
Filter for exact term matching (not analyzed).
Use for keyword fields, IDs, status values, and exact matches.
"""
def __init__(self, field, value, **kwargs):
"""
Initialize TermFilter.
Args:
field (str): Field name
value (str|int|float|bool): Exact value to match
**kwargs: Additional term filter parameters
"""
pass
# Exact term filtering
from pyes import TermFilter
# Filter by exact status
status_filter = TermFilter("status", "published")
# Filter by numeric value
view_filter = TermFilter("view_count", 1000)
# Filter by boolean value
featured_filter = TermFilter("featured", True)
# Filter by keyword field
category_filter = TermFilter("category.keyword", "Python Tutorial")class TermsFilter(Filter):
"""
Filter for matching any of multiple exact terms.
Equivalent to multiple TermFilter with OR logic.
"""
def __init__(self, field, values, **kwargs):
"""
Initialize TermsFilter.
Args:
field (str): Field name
values (list): List of exact values to match
**kwargs: Additional terms filter parameters
"""
pass
# Multiple value filtering
from pyes import TermsFilter
# Match multiple categories
categories_filter = TermsFilter("category", ["tutorial", "guide", "reference"])
# Match multiple authors
authors_filter = TermsFilter("author.keyword", ["john_doe", "jane_smith", "bob_wilson"])
# Match multiple IDs
ids_filter = TermsFilter("_id", ["doc1", "doc2", "doc3"])class PrefixFilter(Filter):
"""
Filter documents with terms that start with specified prefix.
"""
def __init__(self, field, prefix, **kwargs):
"""
Initialize PrefixFilter.
Args:
field (str): Field name
prefix (str): Prefix to match
**kwargs: Additional prefix filter parameters
"""
pass
# Prefix matching
from pyes import PrefixFilter
# Filter titles starting with "Python"
title_prefix_filter = PrefixFilter("title.keyword", "Python")
# Filter tags starting with "elastic"
tag_prefix_filter = PrefixFilter("tags", "elastic")
# Filter file paths starting with "/home/user"
path_prefix_filter = PrefixFilter("file_path", "/home/user")class RegexTermFilter(Filter):
"""
Filter using regular expression pattern matching.
"""
def __init__(self, field, regex, **kwargs):
"""
Initialize RegexTermFilter.
Args:
field (str): Field name
regex (str): Regular expression pattern
**kwargs: Additional regex filter parameters
"""
pass
# Regular expression filtering
from pyes import RegexTermFilter
# Filter email addresses
email_filter = RegexTermFilter("email", r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
# Filter version numbers
version_filter = RegexTermFilter("version", r"[0-9]+\.[0-9]+\.[0-9]+")
# Filter phone numbers
phone_filter = RegexTermFilter("phone", r"\d{3}-\d{3}-\d{4}")class ExistsFilter(Filter):
"""
Filter documents where specified field exists and has a value.
"""
def __init__(self, field, **kwargs):
"""
Initialize ExistsFilter.
Args:
field (str): Field name to check for existence
**kwargs: Additional exists filter parameters
"""
pass
# Field existence filtering
from pyes import ExistsFilter
# Documents with email field
has_email_filter = ExistsFilter("email")
# Documents with thumbnail image
has_thumbnail_filter = ExistsFilter("thumbnail_url")
# Documents with geolocation
has_location_filter = ExistsFilter("location")class MissingFilter(Filter):
"""
Filter documents where specified field is missing or null.
"""
def __init__(self, field, **kwargs):
"""
Initialize MissingFilter.
Args:
field (str): Field name to check for absence
**kwargs: Additional missing filter parameters
"""
pass
# Field absence filtering
from pyes import MissingFilter
# Documents without email
no_email_filter = MissingFilter("email")
# Documents without tags
no_tags_filter = MissingFilter("tags")
# Documents without expiration date
no_expiry_filter = MissingFilter("expires_at")class RangeFilter(Filter):
"""
Filter for range-based matching (numeric, date, or string ranges).
More efficient than RangeQuery when scoring is not needed.
"""
def __init__(self, field, from_value=None, to_value=None,
include_lower=True, include_upper=True, **kwargs):
"""
Initialize RangeFilter.
Args:
field (str): Field name
from_value: Lower bound value (use gte/gt kwargs instead)
to_value: Upper bound value (use lte/lt kwargs instead)
include_lower (bool): Include lower bound. Default: True
include_upper (bool): Include upper bound. Default: True
**kwargs: Range parameters (gte, gt, lte, lt)
"""
pass
# Range filtering
from pyes import RangeFilter
# Date range filtering
date_filter = RangeFilter("published_date", gte="2023-01-01", lt="2024-01-01")
# Numeric range filtering
view_count_filter = RangeFilter("view_count", gte=100, lte=10000)
# Price range filtering
price_filter = RangeFilter("price", gte=10.00, lt=100.00)
# Age range filtering (exclusive bounds)
age_filter = RangeFilter("age", gt=18, lt=65)
# Score range filtering
score_filter = RangeFilter("rating", gte=4.0)class LimitFilter(Filter):
"""
Limit the number of documents processed by subsequent filters.
Useful for performance optimization with large datasets.
"""
def __init__(self, value, **kwargs):
"""
Initialize LimitFilter.
Args:
value (int): Maximum number of documents to process
**kwargs: Additional limit filter parameters
"""
pass
# Limit processing for performance
from pyes import LimitFilter
# Only process first 1000 documents
limit_filter = LimitFilter(1000)class GeoDistanceFilter(Filter):
"""
Filter documents within specified distance from a point.
"""
def __init__(self, field=None, location=None, distance=None, **kwargs):
"""
Initialize GeoDistanceFilter.
Args:
field (str): Geo-point field name (can be specified in kwargs as field name)
location (dict): Point location {"lat": lat, "lon": lon}
distance (str): Distance with unit (e.g., "5km", "10mi")
**kwargs: Can specify location as field_name=location
"""
pass
# Geographic distance filtering
from pyes import GeoDistanceFilter
# Within 5km of specific coordinates
geo_filter = GeoDistanceFilter(
location={"lat": 40.7128, "lon": -74.0060},
distance="5km"
)
# Alternative syntax with field name
location_filter = GeoDistanceFilter(
restaurant_location={"lat": 40.7128, "lon": -74.0060},
distance="2mi"
)
# Within walking distance
walking_filter = GeoDistanceFilter(
store_location={"lat": 37.7749, "lon": -122.4194},
distance="500m"
)class GeoBoundingBoxFilter(Filter):
"""
Filter documents within a geographic bounding box.
"""
def __init__(self, field, **kwargs):
"""
Initialize GeoBoundingBoxFilter.
Args:
field (str): Geo-point field name
**kwargs: Bounding box coordinates (top_left, bottom_right or
top, left, bottom, right)
"""
pass
# Bounding box filtering
from pyes import GeoBoundingBoxFilter
# Documents within NYC area
nyc_filter = GeoBoundingBoxFilter(
"location",
top_left={"lat": 40.8176, "lon": -74.0431},
bottom_right={"lat": 40.4774, "lon": -73.7004}
)
# Alternative coordinate specification
area_filter = GeoBoundingBoxFilter(
"coordinates",
top=40.8176,
left=-74.0431,
bottom=40.4774,
right=-73.7004
)class GeoPolygonFilter(Filter):
"""
Filter documents within a geographic polygon.
"""
def __init__(self, field, points, **kwargs):
"""
Initialize GeoPolygonFilter.
Args:
field (str): Geo-point field name
points (list): List of polygon vertices as {"lat": lat, "lon": lon}
**kwargs: Additional geo polygon parameters
"""
pass
# Polygon area filtering
from pyes import GeoPolygonFilter
# Custom polygon area
polygon_filter = GeoPolygonFilter(
"location",
points=[
{"lat": 40.7589, "lon": -73.9851}, # Times Square
{"lat": 40.7505, "lon": -73.9934}, # Penn Station
{"lat": 40.7282, "lon": -74.0776}, # World Trade Center
{"lat": 40.7614, "lon": -73.9776} # Central Park
]
)class GeoShapeFilter(Filter):
"""
Filter documents using complex geographic shapes.
"""
def __init__(self, field, shape=None, relation="intersects", **kwargs):
"""
Initialize GeoShapeFilter.
Args:
field (str): Geo-shape field name
shape (dict): GeoJSON shape definition
relation (str): Spatial relation (intersects, disjoint, within, contains)
**kwargs: Additional geo shape parameters
"""
pass
# Complex geo shape filtering
from pyes import GeoShapeFilter
# Circle shape
circle_filter = GeoShapeFilter(
"region",
shape={
"type": "circle",
"coordinates": [-73.9857, 40.7484],
"radius": "1km"
},
relation="intersects"
)
# Polygon shape
polygon_shape_filter = GeoShapeFilter(
"boundary",
shape={
"type": "polygon",
"coordinates": [[
[-74.0059, 40.7128],
[-74.0059, 40.7589],
[-73.9352, 40.7589],
[-73.9352, 40.7128],
[-74.0059, 40.7128]
]]
}
)class GeoIndexedShapeFilter(Filter):
"""
Filter using pre-indexed shapes stored in ElasticSearch.
"""
def __init__(self, field, indexed_shape_index, indexed_shape_type,
indexed_shape_id, indexed_shape_path=None, relation="intersects", **kwargs):
"""
Initialize GeoIndexedShapeFilter.
Args:
field (str): Geo-shape field name
indexed_shape_index (str): Index containing the shape
indexed_shape_type (str): Type of the shape document
indexed_shape_id (str): ID of the shape document
indexed_shape_path (str, optional): Path to shape in document
relation (str): Spatial relation. Default: "intersects"
**kwargs: Additional parameters
"""
pass
# Use pre-indexed shapes
from pyes import GeoIndexedShapeFilter
# Filter using stored city boundary
city_filter = GeoIndexedShapeFilter(
"location",
indexed_shape_index="shapes",
indexed_shape_type="boundary",
indexed_shape_id="nyc_boundary",
relation="within"
)class HasChildFilter(Filter):
"""
Filter parent documents that have child documents matching criteria.
"""
def __init__(self, type, filter=None, query=None, **kwargs):
"""
Initialize HasChildFilter.
Args:
type (str): Child document type
filter (Filter, optional): Filter for child documents
query (Query, optional): Query for child documents
**kwargs: Additional has_child parameters
"""
pass
# Parent-child filtering
from pyes import HasChildFilter, TermFilter
# Blog posts with approved comments
posts_with_approved_comments = HasChildFilter(
type="comment",
filter=TermFilter("status", "approved")
)
# Products with variants in stock
products_in_stock = HasChildFilter(
type="variant",
filter=RangeFilter("stock_quantity", gt=0)
)class HasParentFilter(Filter):
"""
Filter child documents that have parent documents matching criteria.
"""
def __init__(self, parent_type, filter=None, query=None, **kwargs):
"""
Initialize HasParentFilter.
Args:
parent_type (str): Parent document type
filter (Filter, optional): Filter for parent documents
query (Query, optional): Query for parent documents
**kwargs: Additional has_parent parameters
"""
pass
# Child document filtering based on parent
from pyes import HasParentFilter
# Comments on featured posts
comments_on_featured = HasParentFilter(
parent_type="post",
filter=TermFilter("featured", True)
)
# Variants of products on sale
variants_on_sale = HasParentFilter(
parent_type="product",
filter=RangeFilter("discount_percent", gt=0)
)class NestedFilter(Filter):
"""
Filter nested objects within documents.
"""
def __init__(self, path, filter=None, query=None, **kwargs):
"""
Initialize NestedFilter.
Args:
path (str): Path to nested object
filter (Filter, optional): Filter for nested objects
query (Query, optional): Query for nested objects
**kwargs: Additional nested parameters
"""
pass
# Nested object filtering
from pyes import NestedFilter, BoolFilter
# Products with red variants under $100
nested_filter = NestedFilter(
path="variants",
filter=BoolFilter(
must=[
TermFilter("variants.color", "red"),
RangeFilter("variants.price", lt=100)
]
)
)class TypeFilter(Filter):
"""
Filter documents by their document type.
"""
def __init__(self, type, **kwargs):
"""
Initialize TypeFilter.
Args:
type (str): Document type to filter by
**kwargs: Additional type filter parameters
"""
pass
# Document type filtering
from pyes import TypeFilter
# Only blog post documents
posts_only = TypeFilter("blog_post")
# Only user documents
users_only = TypeFilter("user")class IdsFilter(Filter):
"""
Filter documents by their IDs.
"""
def __init__(self, values, type=None, **kwargs):
"""
Initialize IdsFilter.
Args:
values (list): List of document IDs
type (str|list, optional): Document type(s) to search in
**kwargs: Additional IDs filter parameters
"""
pass
# ID-based filtering
from pyes import IdsFilter
# Specific document IDs
specific_docs = IdsFilter(["doc1", "doc2", "doc3"])
# IDs within specific types
typed_ids = IdsFilter(["post1", "post2"], type="blog_post")class QueryFilter(Filter):
"""
Wrap a query as a filter (query in filter context).
Converts scoring queries to non-scoring filters.
"""
def __init__(self, query, **kwargs):
"""
Initialize QueryFilter.
Args:
query (Query): Query to use as filter
**kwargs: Additional query filter parameters
"""
pass
# Query as filter
from pyes import QueryFilter, MatchQuery
# Use text search as filter (no scoring)
text_filter = QueryFilter(MatchQuery("content", "python elasticsearch"))class ScriptFilter(Filter):
"""
Filter using custom scripts for complex logic.
"""
def __init__(self, script, lang="mvel", params=None, **kwargs):
"""
Initialize ScriptFilter.
Args:
script (str): Script code
lang (str): Script language. Default: "mvel"
params (dict, optional): Script parameters
**kwargs: Additional script filter parameters
"""
pass
# Script-based filtering
from pyes import ScriptFilter
# Custom calculation filter
custom_filter = ScriptFilter(
script="doc['price'].value * doc['quantity'].value > params.min_total",
params={"min_total": 100}
)
# Date calculation filter
date_filter = ScriptFilter(
script="(System.currentTimeMillis() - doc['created_date'].value) < params.max_age_ms",
params={"max_age_ms": 86400000} # 24 hours
)class MatchAllFilter(Filter):
"""
Filter that matches all documents (passes everything through).
"""
def __init__(self, **kwargs):
"""
Initialize MatchAllFilter.
Args:
**kwargs: Additional parameters
"""
pass
# Match everything filter
from pyes import MatchAllFilter
# Pass all documents (useful as base filter)
all_filter = MatchAllFilter()class RawFilter(Filter):
"""
Raw ElasticSearch filter JSON for custom or unsupported filters.
"""
def __init__(self, filter_dict):
"""
Initialize RawFilter.
Args:
filter_dict (dict): Raw ElasticSearch filter JSON
"""
pass
# Raw filter for custom ElasticSearch functionality
from pyes import RawFilter
# Custom filter not supported by PyES
custom_raw_filter = RawFilter({
"custom_filter": {
"field": "special_field",
"parameters": {"custom_param": "value"}
}
})# Build complex filters programmatically
def build_content_filter(categories=None, date_range=None, author=None,
location=None, min_rating=None, has_images=False):
"""
Build complex content filtering with multiple optional criteria.
"""
filters = []
# Category filtering
if categories:
if isinstance(categories, list):
filters.append(TermsFilter("category", categories))
else:
filters.append(TermFilter("category", categories))
# Date range filtering
if date_range:
filters.append(RangeFilter("published_date", **date_range))
# Author filtering
if author:
filters.append(TermFilter("author.keyword", author))
# Geographic filtering
if location:
filters.append(GeoDistanceFilter(**location))
# Rating filtering
if min_rating:
filters.append(RangeFilter("rating", gte=min_rating))
# Image requirement
if has_images:
filters.append(ExistsFilter("images"))
# Combine all filters
if len(filters) == 1:
return filters[0]
elif len(filters) > 1:
return BoolFilter(must=filters)
else:
return MatchAllFilter()
# Usage examples
from pyes import Search, MatchAllQuery
# Restaurant search with multiple criteria
restaurant_filter = build_content_filter(
categories=["restaurant", "cafe"],
location={
"location": {"lat": 40.7128, "lon": -74.0060},
"distance": "2km"
},
min_rating=4.0,
has_images=True
)
search = Search(MatchAllQuery()).filter(restaurant_filter)
results = es.search(search, indices=["places"])
# Blog content filtering
blog_filter = build_content_filter(
categories="tutorial",
date_range={"gte": "2023-01-01"},
author="expert_author"
)
blog_search = Search(MatchAllQuery()).filter(blog_filter)# Optimize filter performance with proper ordering
def optimize_filter_performance(filters):
"""
Order filters for optimal performance:
1. Cheapest filters first (term, range)
2. More expensive filters last (geo, script)
3. Most selective filters first
"""
# Categorize filters by performance cost
cheap_filters = [] # term, terms, range, exists, missing
medium_filters = [] # prefix, wildcard, regex
expensive_filters = [] # geo, script, nested
for f in filters:
filter_type = f.__class__.__name__
if filter_type in ['TermFilter', 'TermsFilter', 'RangeFilter',
'ExistsFilter', 'MissingFilter']:
cheap_filters.append(f)
elif filter_type in ['PrefixFilter', 'WildcardFilter', 'RegexTermFilter']:
medium_filters.append(f)
else:
expensive_filters.append(f)
# Return optimally ordered filters
return cheap_filters + medium_filters + expensive_filters
# Example usage
filters = [
ScriptFilter("complex_calculation()"), # Expensive
TermFilter("status", "published"), # Cheap
GeoDistanceFilter(location={"lat": 40, "lon": -74}, distance="5km"), # Expensive
RangeFilter("published_date", gte="2023-01-01"), # Cheap
ExistsFilter("content") # Cheap
]
optimized_filters = optimize_filter_performance(filters)
final_filter = BoolFilter(must=optimized_filters)# Leverage ElasticSearch filter caching
from pyes import BoolFilter, TermFilter, RangeFilter
# Filters are automatically cached by ElasticSearch
# Design filters for optimal caching:
# 1. Use consistent filter values
status_filter = TermFilter("status", "published") # Will be cached
# 2. Separate dynamic and static parts
def build_cached_filter(dynamic_date):
"""Build filter with cacheable and non-cacheable parts."""
# Static filters (highly cacheable)
static_filter = BoolFilter(
must=[
TermFilter("status", "published"),
TermFilter("category", "article"),
ExistsFilter("content")
]
)
# Dynamic filter (less cacheable)
dynamic_filter = RangeFilter("created_date", gte=dynamic_date)
# Combine efficiently
return BoolFilter(
must=[static_filter],
filter=[dynamic_filter]
)
# 3. Use filters in consistent order
def consistent_multi_filter(tags, min_views):
"""Always construct filters in same order for cache efficiency."""
filters = []
# Always add in same order
if tags:
filters.append(TermsFilter("tags", sorted(tags))) # Sort for consistency
if min_views:
filters.append(RangeFilter("view_count", gte=min_views))
return BoolFilter(must=filters)The PyES Filter DSL provides comprehensive, high-performance filtering capabilities with full support for logical combinations, geospatial operations, document relationships, and complex filtering scenarios while maintaining ElasticSearch's filter caching benefits.
Install with Tessl CLI
npx tessl i tessl/pypi-pyes