Microsoft Azure AI Search Client Library for Python providing comprehensive search, indexing, and AI-powered document processing capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
The SearchClient provides comprehensive functionality for searching documents, managing document lifecycle, and executing queries against Azure AI Search indexes. It supports text search, vector search, hybrid queries, suggestions, autocomplete, and sophisticated filtering and ranking capabilities.
Create a SearchClient instance to connect to a specific search index.
class SearchClient:
def __init__(
self,
endpoint: str,
index_name: str,
credential: Union[AzureKeyCredential, TokenCredential],
**kwargs
) -> None:
"""
Initialize SearchClient for a specific index.
Parameters:
- endpoint (str): The URL endpoint of an Azure search service
- index_name (str): The name of the index to connect to
- credential: A credential to authorize search client requests
- api_version (str, optional): The Search API version to use
- audience (str, optional): AAD audience for authentication
"""
def close(self) -> None:
"""Close the session."""
def __enter__(self) -> "SearchClient": ...
def __exit__(self, *args) -> None: ...Execute search queries with various modes and options.
def search(
self,
search_text: Optional[str] = None,
*,
include_total_count: Optional[bool] = None,
facets: Optional[List[str]] = None,
filter: Optional[str] = None,
highlight_fields: Optional[str] = None,
highlight_post_tag: Optional[str] = None,
highlight_pre_tag: Optional[str] = None,
minimum_coverage: Optional[float] = None,
order_by: Optional[List[str]] = None,
query_type: Optional[Union[str, QueryType]] = None,
scoring_parameters: Optional[List[str]] = None,
scoring_profile: Optional[str] = None,
search_fields: Optional[List[str]] = None,
search_mode: Optional[Union[str, SearchMode]] = None,
select: Optional[List[str]] = None,
skip: Optional[int] = None,
top: Optional[int] = None,
vector_queries: Optional[List[VectorQuery]] = None,
semantic_configuration_name: Optional[str] = None,
query_answer: Optional[Union[str, QueryAnswerType]] = None,
query_caption: Optional[Union[str, QueryCaptionType]] = None,
**kwargs
) -> SearchItemPaged:
"""
Execute a search query against the index.
Parameters:
- search_text (str, optional): Text to search for
- include_total_count (bool): Include total count of matches
- facets (List[str]): Facet expressions for navigation
- filter (str): OData filter expression
- highlight_fields (str): Fields to highlight in results
- highlight_pre_tag (str): Tag before highlighted text
- highlight_post_tag (str): Tag after highlighted text
- minimum_coverage (float): Minimum coverage percentage
- order_by (List[str]): Sort expressions
- query_type (QueryType): Type of query (simple, full, semantic)
- scoring_parameters (List[str]): Scoring parameter values
- scoring_profile (str): Scoring profile name
- search_fields (List[str]): Fields to search in
- search_mode (SearchMode): Search mode (any, all)
- select (List[str]): Fields to include in results
- skip (int): Number of results to skip
- top (int): Number of results to return
- vector_queries (List[VectorQuery]): Vector queries for similarity search
- semantic_configuration_name (str): Semantic search configuration
- query_answer (QueryAnswerType): Answer extraction type
- query_caption (QueryCaptionType): Caption extraction type
Returns:
SearchItemPaged: Iterator over search results
"""Get search suggestions and autocomplete results based on partial input.
def suggest(
self,
search_text: str,
suggester_name: str,
*,
filter: Optional[str] = None,
use_fuzzy_matching: Optional[bool] = None,
highlight_post_tag: Optional[str] = None,
highlight_pre_tag: Optional[str] = None,
minimum_coverage: Optional[float] = None,
order_by: Optional[List[str]] = None,
search_fields: Optional[List[str]] = None,
select: Optional[List[str]] = None,
top: Optional[int] = None,
**kwargs
) -> List[Dict]:
"""
Get search suggestions based on partial search text.
Parameters:
- search_text (str): Partial search text
- suggester_name (str): Name of the suggester to use
- filter (str): OData filter expression
- use_fuzzy_matching (bool): Enable fuzzy matching
- highlight_pre_tag (str): Tag before highlighted text
- highlight_post_tag (str): Tag after highlighted text
- minimum_coverage (float): Minimum coverage percentage
- order_by (List[str]): Sort expressions
- search_fields (List[str]): Fields to search in
- select (List[str]): Fields to include in results
- top (int): Number of suggestions to return
Returns:
List[Dict]: List of suggestion results
"""
def autocomplete(
self,
search_text: str,
suggester_name: str,
*,
autocomplete_mode: Optional[Union[str, AutocompleteMode]] = None,
filter: Optional[str] = None,
use_fuzzy_matching: Optional[bool] = None,
highlight_post_tag: Optional[str] = None,
highlight_pre_tag: Optional[str] = None,
minimum_coverage: Optional[float] = None,
search_fields: Optional[List[str]] = None,
top: Optional[int] = None,
**kwargs
) -> List[Dict]:
"""
Get autocomplete suggestions based on partial search text.
Parameters:
- search_text (str): Partial search text
- suggester_name (str): Name of the suggester to use
- autocomplete_mode (AutocompleteMode): Autocomplete behavior
- filter (str): OData filter expression
- use_fuzzy_matching (bool): Enable fuzzy matching
- highlight_pre_tag (str): Tag before highlighted text
- highlight_post_tag (str): Tag after highlighted text
- minimum_coverage (float): Minimum coverage percentage
- search_fields (List[str]): Fields to search in
- top (int): Number of completions to return
Returns:
List[Dict]: List of autocomplete results
"""Get individual documents and document counts.
def get_document(
self,
key: str,
selected_fields: Optional[List[str]] = None,
**kwargs
) -> Dict:
"""
Retrieve a document by its key value.
Parameters:
- key (str): The key value of the document to retrieve
- selected_fields (List[str], optional): Fields to include in result
Returns:
Dict: The retrieved document
"""
def get_document_count(self, **kwargs) -> int:
"""
Get the count of documents in the index.
Returns:
int: Number of documents in the index
"""Add, update, merge, and delete documents in the search index.
def upload_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
"""
Upload documents to the index. Creates new documents or replaces existing ones.
Parameters:
- documents (List[Dict]): Documents to upload
Returns:
List[IndexingResult]: Results of the indexing operations
"""
def merge_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
"""
Merge documents into the index. Updates existing documents with provided fields.
Parameters:
- documents (List[Dict]): Documents to merge
Returns:
List[IndexingResult]: Results of the indexing operations
"""
def merge_or_upload_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
"""
Merge documents if they exist, upload if they don't.
Parameters:
- documents (List[Dict]): Documents to merge or upload
Returns:
List[IndexingResult]: Results of the indexing operations
"""
def delete_documents(self, documents: List[Dict], **kwargs) -> List[IndexingResult]:
"""
Delete documents from the index.
Parameters:
- documents (List[Dict]): Documents to delete (must include key field)
Returns:
List[IndexingResult]: Results of the deletion operations
"""
def index_documents(self, batch: IndexDocumentsBatch, **kwargs) -> List[IndexingResult]:
"""
Execute a batch of document operations.
Parameters:
- batch (IndexDocumentsBatch): Batch of document operations
Returns:
List[IndexingResult]: Results of the batch operations
"""Create and manage batches of document operations for efficient processing.
class IndexDocumentsBatch:
"""Batch container for document operations."""
def __init__(self) -> None:
"""Initialize an empty batch."""
def add_upload_actions(self, documents: List[Dict]) -> None:
"""Add upload actions to the batch."""
def add_delete_actions(self, documents: List[Dict]) -> None:
"""Add delete actions to the batch."""
def add_merge_actions(self, documents: List[Dict]) -> None:
"""Add merge actions to the batch."""
def add_merge_or_upload_actions(self, documents: List[Dict]) -> None:
"""Add merge-or-upload actions to the batch."""
def __len__(self) -> int:
"""Get the number of actions in the batch."""Buffered sender for automatic batching and retry handling in high-volume scenarios.
class SearchIndexingBufferedSender:
"""High-throughput document indexing with automatic batching."""
def __init__(
self,
endpoint: str,
index_name: str,
credential: Union[AzureKeyCredential, TokenCredential],
*,
auto_flush_interval: int = 60,
initial_batch_action_count: int = 512,
max_retries_per_action: int = 3,
max_retries: int = 3,
**kwargs
) -> None:
"""
Initialize buffered sender for high-throughput indexing.
Parameters:
- endpoint (str): Search service endpoint
- index_name (str): Target index name
- credential: Authentication credential
- auto_flush_interval (int): Auto-flush interval in seconds
- initial_batch_action_count (int): Initial batch size
- max_retries_per_action (int): Max retries per document
- max_retries (int): Max retries per batch
"""
def upload_documents(self, documents: List[Dict], **kwargs) -> None:
"""Queue documents for upload."""
def delete_documents(self, documents: List[Dict], **kwargs) -> None:
"""Queue documents for deletion."""
def merge_documents(self, documents: List[Dict], **kwargs) -> None:
"""Queue documents for merge."""
def merge_or_upload_documents(self, documents: List[Dict], **kwargs) -> None:
"""Queue documents for merge or upload."""
def flush(self, timeout: Optional[int] = None, **kwargs) -> bool:
"""
Flush all pending operations.
Parameters:
- timeout (int, optional): Timeout in seconds
Returns:
bool: True if all operations completed successfully
"""
def close(self, **kwargs) -> None:
"""Close the sender and flush remaining operations."""
def __enter__(self) -> "SearchIndexingBufferedSender": ...
def __exit__(self, *args) -> None: ...Send custom HTTP requests to the search service.
def send_request(
self,
request: HttpRequest,
*,
stream: bool = False,
**kwargs
) -> HttpResponse:
"""
Send a custom HTTP request to the search service.
Parameters:
- request (HttpRequest): The HTTP request to send
- stream (bool): Whether to stream the response
Returns:
HttpResponse: The HTTP response
"""from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
client = SearchClient(
endpoint="https://service.search.windows.net",
index_name="hotels",
credential=AzureKeyCredential("admin-key")
)
# Simple text search
results = client.search("luxury hotel", top=5)
for result in results:
print(f"{result['name']}: {result['@search.score']}")
# Filtered search with facets
results = client.search(
search_text="beach resort",
filter="rating ge 4",
facets=["category", "city"],
order_by=["rating desc", "name"]
)from azure.search.documents.models import VectorizedQuery
# Vector search with pre-computed embedding
embedding = [0.1, 0.2, 0.3, ...] # Your computed vector
vector_query = VectorizedQuery(vector=embedding, k_nearest_neighbors=5, fields="content_vector")
results = client.search(
search_text=None,
vector_queries=[vector_query],
select=["id", "title", "content"]
)from azure.search.documents import IndexDocumentsBatch
# Create batch
batch = IndexDocumentsBatch()
batch.add_upload_actions([
{"id": "1", "title": "Document 1", "content": "Content 1"},
{"id": "2", "title": "Document 2", "content": "Content 2"}
])
batch.add_delete_actions([{"id": "old-doc"}])
# Execute batch
results = client.index_documents(batch)
for result in results:
print(f"Document {result.key}: {'succeeded' if result.status else 'failed'}")from azure.search.documents import SearchIndexingBufferedSender
with SearchIndexingBufferedSender(endpoint, index_name, credential) as sender:
# Documents are automatically batched and sent
sender.upload_documents(large_document_list)
sender.merge_documents(updates)
# Automatic flush on context exit# Search results iterator
class SearchItemPaged:
def __iter__(self) -> Iterator[Dict[str, Any]]: ...
def by_page(self) -> Iterator[List[Dict[str, Any]]]: ...
def get_count(self) -> Optional[int]: ...
def get_coverage(self) -> Optional[float]: ...
def get_facets(self) -> Optional[Dict[str, List[Dict[str, Any]]]]: ...
# Indexing operation result
class IndexingResult:
key: str
status: bool
error_message: Optional[str]
status_code: int
# Exception for oversized requests
class RequestEntityTooLargeError(Exception):
"""Raised when the request payload is too large."""Install with Tessl CLI
npx tessl i tessl/pypi-azure-search-documents