tessl/pypi-azure-search-documents

Microsoft Azure AI Search Client Library for Python providing comprehensive search, indexing, and AI-powered document processing capabilities.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Document Search and Querying

Name: tessl/pypi-azure-search-documents
Author: tessl

The SearchClient provides comprehensive functionality for searching documents, managing document lifecycle, and executing queries against Azure AI Search indexes. It supports text search, vector search, hybrid queries, suggestions, autocomplete, and sophisticated filtering and ranking capabilities.

Capabilities

Client Initialization

Create a SearchClient instance to connect to a specific search index.

class SearchClient:
    def __init__(
        self, 
        endpoint: str, 
        index_name: str, 
        credential: Union[AzureKeyCredential, TokenCredential], 
        **kwargs
    ) -> None:
        """
        Initialize SearchClient for a specific index.
        
        Parameters:
        - endpoint (str): The URL endpoint of an Azure search service
        - index_name (str): The name of the index to connect to  
        - credential: A credential to authorize search client requests
        - api_version (str, optional): The Search API version to use
        - audience (str, optional): AAD audience for authentication
        """
    
    def close(self) -> None:
        """Close the session."""
    
    def __enter__(self) -> "SearchClient": ...
    def __exit__(self, *args) -> None: ...

Document Search

Execute search queries with various modes and options.

def search(
    self,
    search_text: Optional[str] = None,
    *,
    include_total_count: Optional[bool] = None,
    facets: Optional[List[str]] = None,
    filter: Optional[str] = None,
    highlight_fields: Optional[str] = None,
    highlight_post_tag: Optional[str] = None,
    highlight_pre_tag: Optional[str] = None,
    minimum_coverage: Optional[float] = None,
    order_by: Optional[List[str]] = None,
    query_type: Optional[Union[str, QueryType]] = None,
    scoring_parameters: Optional[List[str]] = None,
    scoring_profile: Optional[str] = None,
    search_fields: Optional[List[str]] = None,
    search_mode: Optional[Union[str, SearchMode]] = None,
    select: Optional[List[str]] = None,
    skip: Optional[int] = None,
    top: Optional[int] = None,
    vector_queries: Optional[List[VectorQuery]] = None,
    semantic_configuration_name: Optional[str] = None,
    query_answer: Optional[Union[str, QueryAnswerType]] = None,
    query_caption: Optional[Union[str, QueryCaptionType]] = None,
    **kwargs
) -> SearchItemPaged:
    """
    Execute a search query against the index.
    
    Parameters:
    - search_text (str, optional): Text to search for
    - include_total_count (bool): Include total count of matches
    - facets (List[str]): Facet expressions for navigation
    - filter (str): OData filter expression
    - highlight_fields (str): Fields to highlight in results
    - highlight_pre_tag (str): Tag before highlighted text
    - highlight_post_tag (str): Tag after highlighted text
    - minimum_coverage (float): Minimum coverage percentage
    - order_by (List[str]): Sort expressions
    - query_type (QueryType): Type of query (simple, full, semantic)
    - scoring_parameters (List[str]): Scoring parameter values
    - scoring_profile (str): Scoring profile name
    - search_fields (List[str]): Fields to search in
    - search_mode (SearchMode): Search mode (any, all)
    - select (List[str]): Fields to include in results
    - skip (int): Number of results to skip
    - top (int): Number of results to return
    - vector_queries (List[VectorQuery]): Vector queries for similarity search
    - semantic_configuration_name (str): Semantic search configuration
    - query_answer (QueryAnswerType): Answer extraction type
    - query_caption (QueryCaptionType): Caption extraction type
    
    Returns:
    SearchItemPaged: Iterator over search results
    """

Suggestions and Autocomplete

Get search suggestions and autocomplete results based on partial input.

def suggest(
    self,
    search_text: str,
    suggester_name: str,
    *,
    filter: Optional[str] = None,
    use_fuzzy_matching: Optional[bool] = None,
    highlight_post_tag: Optional[str] = None,
    highlight_pre_tag: Optional[str] = None,
    minimum_coverage: Optional[float] = None,
    order_by: Optional[List[str]] = None,
    search_fields: Optional[List[str]] = None,
    select: Optional[List[str]] = None,
    top: Optional[int] = None,
    **kwargs
) -> List[Dict]:
    """
    Get search suggestions based on partial search text.
    
    Parameters:
    - search_text (str): Partial search text
    - suggester_name (str): Name of the suggester to use
    - filter (str): OData filter expression
    - use_fuzzy_matching (bool): Enable fuzzy matching
    - highlight_pre_tag (str): Tag before highlighted text
    - highlight_post_tag (str): Tag after highlighted text
    - minimum_coverage (float): Minimum coverage percentage
    - order_by (List[str]): Sort expressions
    - search_fields (List[str]): Fields to search in
    - select (List[str]): Fields to include in results
    - top (int): Number of suggestions to return
    
    Returns:
    List[Dict]: List of suggestion results
    """

def autocomplete(
    self,
    search_text: str,
    suggester_name: str,
    *,
    autocomplete_mode: Optional[Union[str, AutocompleteMode]] = None,
    filter: Optional[str] = None,
    use_fuzzy_matching: Optional[bool] = None,
    highlight_post_tag: Optional[str] = None,
    highlight_pre_tag: Optional[str] = None,
    minimum_coverage: Optional[float] = None,
    search_fields: Optional[List[str]] = None,
    top: Optional[int] = None,
    **kwargs
) -> List[Dict]:
    """
    Get autocomplete suggestions based on partial search text.
    
    Parameters:
    - search_text (str): Partial search text
    - suggester_name (str): Name of the suggester to use
    - autocomplete_mode (AutocompleteMode): Autocomplete behavior
    - filter (str): OData filter expression
    - use_fuzzy_matching (bool): Enable fuzzy matching
    - highlight_pre_tag (str): Tag before highlighted text
    - highlight_post_tag (str): Tag after highlighted text
    - minimum_coverage (float): Minimum coverage percentage
    - search_fields (List[str]): Fields to search in
    - top (int): Number of completions to return
    
    Returns:
    List[Dict]: List of autocomplete results
    """

Document Retrieval

Get individual documents and document counts.

def get_document(
    self, 
    key: str, 
    selected_fields: Optional[List[str]] = None, 
    **kwargs
) -> Dict:
    """
    Retrieve a document by its key value.
    
    Parameters:
    - key (str): The key value of the document to retrieve
    - selected_fields (List[str], optional): Fields to include in result
    
    Returns:
    Dict: The retrieved document
    """

def get_document_count(self, **kwargs) -> int:
    """
    Get the count of documents in the index.
    
    Returns:
    int: Number of documents in the index
    """

Document Upload and Indexing

Add, update, merge, and delete documents in the search index.

def upload_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
    """
    Upload documents to the index. Creates new documents or replaces existing ones.
    
    Parameters:
    - documents (List[Dict]): Documents to upload
    
    Returns:
    List[IndexingResult]: Results of the indexing operations
    """

def merge_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
    """
    Merge documents into the index. Updates existing documents with provided fields.
    
    Parameters:
    - documents (List[Dict]): Documents to merge
    
    Returns:
    List[IndexingResult]: Results of the indexing operations
    """

def merge_or_upload_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
    """
    Merge documents if they exist, upload if they don't.
    
    Parameters:
    - documents (List[Dict]): Documents to merge or upload
    
    Returns:
    List[IndexingResult]: Results of the indexing operations
    """

def delete_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
    """
    Delete documents from the index.
    
    Parameters:
    - documents (List[Dict]): Documents to delete (must include key field)
    
    Returns:
    List[IndexingResult]: Results of the deletion operations
    """

def index_documents(self, batch: IndexDocumentsBatch, **kwargs) -> List[IndexingResult]:
    """
    Execute a batch of document operations.
    
    Parameters:
    - batch (IndexDocumentsBatch): Batch of document operations
    
    Returns:
    List[IndexingResult]: Results of the batch operations
    """

Batch Document Operations

Create and manage batches of document operations for efficient processing.

class IndexDocumentsBatch:
    """Batch container for document operations."""
    
    def __init__(self) -> None:
        """Initialize an empty batch."""
    
    def add_upload_actions(self, documents: List[Dict]) -> None:
        """Add upload actions to the batch."""
    
    def add_delete_actions(self, documents: List[Dict]) -> None:
        """Add delete actions to the batch."""
    
    def add_merge_actions(self, documents: List[Dict]) -> None:
        """Add merge actions to the batch."""
    
    def add_merge_or_upload_actions(self, documents: List[Dict]) -> None:
        """Add merge-or-upload actions to the batch."""
    
    def __len__(self) -> int:
        """Get the number of actions in the batch."""

High-Throughput Document Indexing

Buffered sender for automatic batching and retry handling in high-volume scenarios.

class SearchIndexingBufferedSender:
    """High-throughput document indexing with automatic batching."""
    
    def __init__(
        self,
        endpoint: str,
        index_name: str, 
        credential: Union[AzureKeyCredential, TokenCredential],
        *,
        auto_flush_interval: int = 60,
        initial_batch_action_count: int = 512,
        max_retries_per_action: int = 3,
        max_retries: int = 3,
        **kwargs
    ) -> None:
        """
        Initialize buffered sender for high-throughput indexing.
        
        Parameters:
        - endpoint (str): Search service endpoint
        - index_name (str): Target index name
        - credential: Authentication credential
        - auto_flush_interval (int): Auto-flush interval in seconds
        - initial_batch_action_count (int): Initial batch size
        - max_retries_per_action (int): Max retries per document
        - max_retries (int): Max retries per batch
        """
    
    def upload_documents(self, documents: List[Dict], **kwargs) -> None:
        """Queue documents for upload."""
    
    def delete_documents(self, documents: List[Dict], **kwargs) -> None:
        """Queue documents for deletion."""
    
    def merge_documents(self, documents: List[Dict], **kwargs) -> None:
        """Queue documents for merge."""
    
    def merge_or_upload_documents(self, documents: List[Dict], **kwargs) -> None:
        """Queue documents for merge or upload."""
    
    def flush(self, timeout: Optional[int] = None, **kwargs) -> bool:
        """
        Flush all pending operations.
        
        Parameters:
        - timeout (int, optional): Timeout in seconds
        
        Returns:
        bool: True if all operations completed successfully
        """
    
    def close(self, **kwargs) -> None:
        """Close the sender and flush remaining operations."""
    
    def __enter__(self) -> "SearchIndexingBufferedSender": ...
    def __exit__(self, *args) -> None: ...

Request Customization

Send custom HTTP requests to the search service.

def send_request(
    self, 
    request: HttpRequest, 
    *, 
    stream: bool = False, 
    **kwargs
) -> HttpResponse:
    """
    Send a custom HTTP request to the search service.
    
    Parameters:
    - request (HttpRequest): The HTTP request to send
    - stream (bool): Whether to stream the response
    
    Returns:
    HttpResponse: The HTTP response
    """

Usage Examples

Basic Search

from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential

client = SearchClient(
    endpoint="https://service.search.windows.net",
    index_name="hotels",
    credential=AzureKeyCredential("admin-key")
)

# Simple text search
results = client.search("luxury hotel", top=5)
for result in results:
    print(f"{result['name']}: {result['@search.score']}")

# Filtered search with facets
results = client.search(
    search_text="beach resort",
    filter="rating ge 4",
    facets=["category", "city"],
    order_by=["rating desc", "name"]
)

Vector Search

from azure.search.documents.models import VectorizedQuery

# Vector search with pre-computed embedding 
embedding = [0.1, 0.2, 0.3, ...]  # Your computed vector
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=5, fields="content_vector")

results = client.search(
    search_text=None,
    vector_queries=[vector_query],
    select=["id", "title", "content"]
)

Batch Document Upload

from azure.search.documents import IndexDocumentsBatch

# Create batch
batch = IndexDocumentsBatch()
batch.add_upload_actions([
    {"id": "1", "title": "Document 1", "content": "Content 1"},
    {"id": "2", "title": "Document 2", "content": "Content 2"}
])
batch.add_delete_actions([{"id": "old-doc"}])

# Execute batch
results = client.index_documents(batch)
for result in results:
    print(f"Document {result.key}: {'succeeded' if result.status else 'failed'}")

High-Volume Indexing

from azure.search.documents import SearchIndexingBufferedSender

with SearchIndexingBufferedSender(endpoint, index_name, credential) as sender:
    # Documents are automatically batched and sent
    sender.upload_documents(large_document_list)
    sender.merge_documents(updates)
    # Automatic flush on context exit

Common Types

# Search results iterator
class SearchItemPaged:
    def __iter__(self) -> Iterator[Dict[str, Any]]: ...
    def by_page(self) -> Iterator[List[Dict[str, Any]]]: ...
    def get_count(self) -> Optional[int]: ...
    def get_coverage(self) -> Optional[float]: ...
    def get_facets(self) -> Optional[Dict[str, List[Dict[str, Any]]]]: ...

# Indexing operation result
class IndexingResult:
    key: str
    status: bool
    error_message: Optional[str] 
    status_code: int

# Exception for oversized requests  
class RequestEntityTooLargeError(Exception):
    """Raised when the request payload is too large."""

Install with Tessl CLI

npx tessl i tessl/pypi-azure-search-documents

docs

async-clients.md

index-management.md

index.md

indexer-management.md

models.md

search-client.md

tile.json

tessl/pypi-azure-search-documents

search-client.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

Document Search and Querying

Capabilities

Client Initialization

Document Search

Suggestions and Autocomplete

Document Retrieval

Document Upload and Indexing

Batch Document Operations

High-Throughput Document Indexing

Request Customization

Usage Examples

Basic Search

Vector Search

Batch Document Upload

High-Volume Indexing

Common Types

search-client.mddocs/