tessl/pypi-elasticsearch5

Python client for Elasticsearch 5.x providing comprehensive access to all Elasticsearch APIs and features.

—

Pending

Overview

Eval results

Files

Search Operations

Name: tessl/pypi-elasticsearch5
Author: tessl

Comprehensive search functionality for querying documents in Elasticsearch. Provides full-text search capabilities, query execution, aggregations, scroll operations, and advanced search features.

Capabilities

Basic Search

Execute search queries with support for complex queries, filtering, sorting, and result pagination.

def search(index: str = None, doc_type: str = None, body: dict = None, **params) -> dict:
    """
    Execute a search query against one or more indices.
    
    Parameters:
    - index: Index name(s) to search (string or list)
    - doc_type: Document type(s) to search
    - body: Search query as Elasticsearch Query DSL
    - _source: Fields to include/exclude in results
    - _source_excludes: Fields to exclude from _source
    - _source_includes: Fields to include in _source
    - allow_no_indices: Whether to ignore if indices don't exist
    - analyzer: Analyzer for query string
    - analyze_wildcard: Analyze wildcard and prefix queries
    - batched_reduce_size: Number of shard results to reduce at once
    - default_operator: Default operator for query string ('AND' or 'OR')
    - df: Default field for query string
    - expand_wildcards: Expand wildcard expressions ('open', 'closed', 'none', 'all')
    - from_: Starting document offset (default 0)
    - ignore_unavailable: Ignore unavailable indices
    - lenient: Ignore format-based query failures
    - preference: Node preference for execution
    - q: Lucene query string
    - routing: Routing values
    - scroll: Scroll timeout for pagination
    - search_type: Search type ('query_then_fetch', 'dfs_query_then_fetch')
    - size: Maximum number of documents to return (default 10)
    - sort: Sort specification
    - terminate_after: Terminate after N documents
    - timeout: Search timeout
    - track_scores: Track scores when sorting
    - version: Include document versions in results
    
    Returns:
    dict: Search results with 'hits', 'aggregations', and metadata
    """

Document Counting

Count documents matching a query efficiently without retrieving full results.

def count(index: str = None, doc_type: str = None, body: dict = None, **params) -> dict:
    """
    Count documents matching a query.
    
    Parameters:
    - index: Index name(s) to search
    - doc_type: Document type(s)
    - body: Count query (optional, counts all if omitted)
    - allow_no_indices: Handle missing indices
    - analyzer: Query analyzer
    - analyze_wildcard: Analyze wildcards
    - default_operator: Default query operator
    - df: Default field
    - expand_wildcards: Wildcard expansion
    - ignore_unavailable: Ignore unavailable indices
    - lenient: Ignore query failures
    - min_score: Minimum score threshold
    - preference: Node preference
    - q: Query string
    - routing: Routing values
    - terminate_after: Early termination
    
    Returns:
    dict: Count result with 'count' and '_shards' information
    """

Scroll Search

Efficiently iterate through large result sets using scroll context.

def scroll(scroll_id: str = None, body: dict = None, **params) -> dict:
    """
    Continue a scroll search to retrieve next batch of results.
    
    Parameters:
    - scroll_id: Scroll context identifier from previous search/scroll
    - body: Request body with scroll_id (alternative to parameter)
    - scroll: Time to keep scroll context alive (e.g., '5m')
    - rest_total_hits_as_int: Return total hits as integer
    
    Returns:
    dict: Next batch of search results with new scroll_id
    """

def clear_scroll(scroll_id: str = None, body: dict = None, **params) -> dict:
    """
    Clear scroll context to free resources.
    
    Parameters:
    - scroll_id: Scroll context identifier(s) to clear
    - body: Request body with scroll_id list
    
    Returns:
    dict: Success confirmation
    """

Multi-Search

Execute multiple search queries in a single request for improved performance.

def msearch(body: list, index: str = None, doc_type: str = None, **params) -> dict:
    """
    Execute multiple search queries in a single request.
    
    Parameters:
    - body: List of search requests (header/body pairs)
    - index: Default index for requests without explicit index
    - doc_type: Default document type
    - max_concurrent_searches: Maximum concurrent searches
    - rest_total_hits_as_int: Return total hits as integer
    - typed_keys: Add type prefix to aggregation names
    
    Body format:
    [
        {"index": "my_index", "type": "_doc"},  # Header
        {"query": {"match_all": {}}},           # Body
        {"index": "other_index"},               # Header
        {"query": {"term": {"status": "published"}}}  # Body
    ]
    
    Returns:
    dict: Array of search responses corresponding to each request
    """

def msearch_template(body: list, index: str = None, doc_type: str = None, **params) -> dict:
    """
    Execute multiple search template queries.
    
    Parameters: Same as msearch
    Body: Search template specifications instead of direct queries
    
    Returns:
    dict: Array of search template responses
    """

Search Templates

Use predefined search templates for reusable queries with parameters.

def search_template(index: str = None, doc_type: str = None, body: dict = None, **params) -> dict:
    """
    Execute a search using a search template.
    
    Parameters:
    - index: Index name(s) to search
    - doc_type: Document type(s)
    - body: Template specification with id/source and params
    - allow_no_indices: Handle missing indices
    - expand_wildcards: Wildcard expansion
    - ignore_unavailable: Ignore unavailable indices
    - preference: Node preference
    - routing: Routing values
    - scroll: Scroll timeout
    - search_type: Search type
    
    Body structure:
    {
        "id": "my_template",               # Template ID
        "params": {                        # Template parameters
            "query_string": "search term",
            "from": 0,
            "size": 10
        }
    }
    
    Or with inline template:
    {
        "source": {                        # Inline template
            "query": {
                "match": {
                    "title": "{{query_string}}"
                }
            },
            "from": "{{from}}",
            "size": "{{size}}"
        },
        "params": {"query_string": "test", "from": 0, "size": 20}
    }
    
    Returns:
    dict: Search results from template execution
    """

def render_search_template(id: str = None, body: dict = None, **params) -> dict:
    """
    Render a search template to see the generated query.
    
    Parameters:
    - id: Template ID to render
    - body: Template specification (if not using stored template)
    
    Returns:
    dict: Rendered template showing the final query
    """

Query Explanation

Understand how documents are scored and why they match queries.

def explain(index: str, doc_type: str, id: str, body: dict = None, **params) -> dict:
    """
    Explain why a document matches or doesn't match a query.
    
    Parameters:
    - index: Index name
    - doc_type: Document type
    - id: Document identifier
    - body: Query to explain
    - _source: Include document source in response
    - _source_excludes: Source fields to exclude
    - _source_includes: Source fields to include
    - analyzer: Query analyzer
    - analyze_wildcard: Analyze wildcards
    - default_operator: Default query operator
    - df: Default field
    - lenient: Ignore query failures
    - preference: Node preference
    - q: Query string
    - routing: Routing value
    
    Returns:
    dict: Explanation of scoring and matching details
    """

Search Suggestions

Get search suggestions and completions based on indexed data.

def suggest(body: dict, index: str = None, **params) -> dict:
    """
    Get search suggestions using suggestion APIs.
    
    Parameters:
    - body: Suggestion request specification
    - index: Index name(s) to search for suggestions
    - allow_no_indices: Handle missing indices
    - expand_wildcards: Wildcard expansion
    - ignore_unavailable: Ignore unavailable indices
    - preference: Node preference
    - routing: Routing values
    
    Body structure:
    {
        "my_suggestion": {
            "text": "search text",
            "term": {                      # Term suggester
                "field": "title"
            }
        },
        "my_phrase_suggestion": {
            "text": "search phrase",
            "phrase": {                    # Phrase suggester
                "field": "title",
                "size": 3
            }
        },
        "my_completion": {
            "prefix": "sea",
            "completion": {                # Completion suggester
                "field": "suggest",
                "size": 5
            }
        }
    }
    
    Returns:
    dict: Suggestions organized by suggester name
    """

Advanced Search Features

Additional search capabilities for specialized use cases.

def field_caps(index: str = None, body: dict = None, **params) -> dict:
    """
    Get field capabilities across indices.
    
    Parameters:
    - index: Index name(s) to analyze
    - body: Field names specification
    - fields: Field names to analyze
    - allow_no_indices: Handle missing indices
    - expand_wildcards: Wildcard expansion
    - ignore_unavailable: Ignore unavailable indices
    
    Returns:
    dict: Field capabilities and types across indices
    """

def search_shards(index: str = None, doc_type: str = None, **params) -> dict:
    """
    Get information about shards that a search request would be executed against.
    
    Parameters:
    - index: Index name(s)
    - doc_type: Document type(s)
    - allow_no_indices: Handle missing indices
    - expand_wildcards: Wildcard expansion
    - ignore_unavailable: Ignore unavailable indices
    - local: Execute locally on current node
    - preference: Node preference
    - routing: Routing values
    
    Returns:
    dict: Shard information for the search request
    """

Usage Examples

Basic Search Queries

from elasticsearch5 import Elasticsearch

es = Elasticsearch(['localhost:9200'])

# Simple match query
search_body = {
    'query': {
        'match': {
            'title': 'elasticsearch python'
        }
    },
    'size': 20,
    'from': 0
}
results = es.search(index='articles', body=search_body)
print(f"Found {results['hits']['total']} documents")

# Boolean query with filters
complex_query = {
    'query': {
        'bool': {
            'must': [
                {'match': {'title': 'python'}},
                {'range': {'created_at': {'gte': '2023-01-01'}}}
            ],
            'filter': [
                {'term': {'status': 'published'}}
            ],
            'must_not': [
                {'term': {'category': 'draft'}}
            ]
        }
    },
    'sort': [
        {'created_at': {'order': 'desc'}},
        '_score'
    ],
    '_source': ['title', 'author', 'created_at']
}
results = es.search(index='articles', body=complex_query)

Aggregations

# Search with aggregations
agg_query = {
    'query': {'match_all': {}},
    'aggs': {
        'authors': {
            'terms': {
                'field': 'author.keyword',
                'size': 10
            }
        },
        'publication_dates': {
            'date_histogram': {
                'field': 'created_at',
                'calendar_interval': 'month'
            }
        },
        'avg_score': {
            'avg': {
                'field': 'score'
            }
        }
    },
    'size': 0  # Only return aggregations, no documents
}
results = es.search(index='articles', body=agg_query)
print("Top authors:", results['aggregations']['authors']['buckets'])

Scroll Search for Large Results

# Initial scroll search
scroll_query = {
    'query': {'match_all': {}},
    'size': 1000  # Documents per batch
}
response = es.search(
    index='large_index',
    body=scroll_query,
    scroll='5m'  # Keep scroll context for 5 minutes
)

# Process initial batch
all_docs = response['hits']['hits']
scroll_id = response['_scroll_id']

# Continue scrolling
while len(response['hits']['hits']) > 0:
    response = es.scroll(scroll_id=scroll_id, scroll='5m')
    all_docs.extend(response['hits']['hits'])
    scroll_id = response['_scroll_id']

# Clear scroll context
es.clear_scroll(scroll_id=scroll_id)
print(f"Retrieved {len(all_docs)} documents total")

Multi-Search

# Execute multiple searches in one request
msearch_body = [
    {'index': 'articles', 'type': '_doc'},
    {'query': {'match': {'category': 'tech'}}},
    
    {'index': 'users', 'type': '_doc'},
    {'query': {'range': {'age': {'gte': 18}}}},
    
    {'index': 'articles'},
    {'query': {'match_all': {}}, 'size': 0, 'aggs': {'total': {'value_count': {'field': '_id'}}}}
]

responses = es.msearch(body=msearch_body)
for i, response in enumerate(responses['responses']):
    if 'error' in response:
        print(f"Query {i} failed: {response['error']}")
    else:
        print(f"Query {i} returned {response['hits']['total']} results")

Search Templates

# First, store a search template
template_body = {
    'template': {
        'query': {
            'bool': {
                'must': [
                    {'match': {'{{field}}': '{{query}}'}}
                ],
                'filter': [
                    {'range': {'{{date_field}}': {'gte': '{{start_date}}'}}}
                ]
            }
        },
        'size': '{{size}}',
        'from': '{{from}}'
    }
}
es.put_template(id='article_search', body=template_body)

# Use the template
template_search = {
    'id': 'article_search',
    'params': {
        'field': 'title',
        'query': 'elasticsearch',
        'date_field': 'created_at',
        'start_date': '2023-01-01',
        'size': 10,
        'from': 0
    }
}
results = es.search_template(index='articles', body=template_search)

Install with Tessl CLI