Microsoft Azure AI Search Client Library for Python providing comprehensive search, indexing, and AI-powered document processing capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
This document covers the comprehensive type system for Azure Search Documents, including search request/response models, index schema definitions, AI enrichment configurations, and all enumeration types. These types provide complete type safety and IntelliSense support for all Azure Search operations.
Types for constructing and executing search queries with various modes and options.
# Query types enumeration
class QueryType(str, Enum):
"""Types of search queries."""
SIMPLE = "simple" # Simple Lucene query syntax
FULL = "full" # Full Lucene query syntax
SEMANTIC = "semantic" # Semantic search with AI ranking
# Search modes
class SearchMode(str, Enum):
"""Search behavior for multiple terms."""
ANY = "any" # Match any search terms (OR)
ALL = "all" # Match all search terms (AND)
# Autocomplete modes
class AutocompleteMode(str, Enum):
"""Autocomplete suggestion behavior."""
ONE_TERM = "oneTerm" # Complete one term
TWO_TERMS = "twoTerms" # Complete up to two terms
ONE_TERM_WITH_CONTEXT = "oneTermWithContext" # One term with context
# Scoring statistics
class ScoringStatistics(str, Enum):
"""Statistics used for scoring."""
LOCAL = "local" # Use local statistics
GLOBAL = "global" # Use global statisticsTypes for vector similarity search and hybrid query scenarios.
# Base vector query class
class VectorQuery:
"""Base class for vector queries."""
k_nearest_neighbors: Optional[int] = None
fields: str
exhaustive: Optional[bool] = None
oversampling: Optional[float] = None
# Pre-computed vector query
class VectorizedQuery(VectorQuery):
"""Query with pre-computed vector embeddings."""
vector: List[float]
def __init__(
self,
*,
vector: List[float],
k_nearest_neighbors: Optional[int] = None,
fields: str,
exhaustive: Optional[bool] = None,
oversampling: Optional[float] = None
): ...
# Text-based vector query
class VectorizableTextQuery(VectorQuery):
"""Text query that will be vectorized by the service."""
text: str
def __init__(
self,
*,
text: str,
k_nearest_neighbors: Optional[int] = None,
fields: str,
exhaustive: Optional[bool] = None,
oversampling: Optional[float] = None
): ...
# Vector filter modes
class VectorFilterMode(str, Enum):
"""How to apply filters in vector search."""
PRE_FILTER = "preFilter" # Filter before vector search
POST_FILTER = "postFilter" # Filter after vector searchTypes for AI-powered semantic search with natural language understanding.
# Semantic search result types
class SemanticSearchResultsType(str, Enum):
"""Types of semantic search results."""
BASE_RESULTS = "baseResults" # Standard results
RERANKED_RESULTS = "rerankedResults" # AI-reranked results
# Query answer types
class QueryAnswerType(str, Enum):
"""Types of extractive answers."""
NONE = "none" # No answers
EXTRACTIVE = "extractive" # Extract answers from content
# Query caption types
class QueryCaptionType(str, Enum):
"""Types of semantic captions."""
NONE = "none" # No captions
EXTRACTIVE = "extractive" # Extract relevant passages
# Semantic error handling
class SemanticErrorMode(str, Enum):
"""Error handling for semantic search."""
PARTIAL = "partial" # Return partial results
FAIL = "fail" # Fail on semantic errors
class SemanticErrorReason(str, Enum):
"""Reasons for semantic search errors."""
CAPACITY_OVERLOADED = "capacityOverloaded"
TRANSIENT = "transient"
# Semantic search results
class QueryAnswerResult:
"""Extractive answer from semantic search."""
text: Optional[str] = None
highlights: Optional[str] = None
score: Optional[float] = None
class QueryCaptionResult:
"""Semantic caption highlighting relevant content."""
text: Optional[str] = None
highlights: Optional[str] = NoneTypes for document indexing, updates, and batch operations.
# Document operation types
class IndexAction(str, Enum):
"""Types of document operations."""
UPLOAD = "upload" # Add or replace document
MERGE = "merge" # Update existing document fields
MERGE_OR_UPLOAD = "mergeOrUpload" # Merge if exists, upload if not
DELETE = "delete" # Delete document
# Indexing operation result
class IndexingResult:
"""Result of a document indexing operation."""
key: str # Document key
status: bool # Success/failure status
error_message: Optional[str] = None # Error message if failed
status_code: int # HTTP status code
def succeeded(self) -> bool:
"""Check if the operation succeeded."""
return self.status and self.status_code < 400Complete type system for defining search index schemas and field configurations.
# Field data types
class SearchFieldDataType(str, Enum):
"""Data types for search index fields."""
STRING = "Edm.String"
INT32 = "Edm.Int32"
INT64 = "Edm.Int64"
DOUBLE = "Edm.Double"
BOOLEAN = "Edm.Boolean"
DATE_TIME_OFFSET = "Edm.DateTimeOffset"
GEOGRAPHY_POINT = "Edm.GeographyPoint"
COMPLEX_TYPE = "Edm.ComplexType"
@staticmethod
def Collection(item_type: "SearchFieldDataType") -> str:
"""Create a collection type."""
return f"Collection({item_type.value})"
# Search field definition
class SearchField:
"""Definition of a field in a search index."""
name: str
type: SearchFieldDataType
key: bool = False
retrievable: bool = True
searchable: bool = False
filterable: bool = False
sortable: bool = False
facetable: bool = False
analyzer_name: Optional[str] = None
search_analyzer_name: Optional[str] = None
index_analyzer_name: Optional[str] = None
synonym_map_names: Optional[List[str]] = None
fields: Optional[List["SearchField"]] = None # For complex types
vector_search_dimensions: Optional[int] = None
vector_search_profile_name: Optional[str] = None
def __init__(
self,
*,
name: str,
type: SearchFieldDataType,
**kwargs
): ...
# Helper functions for common field types
def SearchableField(
name: str,
*,
collection: bool = False,
key: bool = False,
retrievable: bool = True,
sortable: bool = False,
filterable: bool = False,
facetable: bool = False,
analyzer_name: Optional[str] = None,
search_analyzer_name: Optional[str] = None,
index_analyzer_name: Optional[str] = None,
synonym_map_names: Optional[List[str]] = None
) -> SearchField:
"""Create a searchable field."""
def SimpleField(
name: str,
type: SearchFieldDataType,
*,
key: bool = False,
filterable: bool = False,
sortable: bool = False,
facetable: bool = False,
retrievable: bool = True
) -> SearchField:
"""Create a simple field."""
def ComplexField(
name: str,
*,
fields: List[SearchField],
collection: bool = False,
retrievable: bool = True
) -> SearchField:
"""Create a complex field with nested fields."""
# Complete index definition
class SearchIndex:
"""Complete search index definition."""
name: str
fields: List[SearchField]
scoring_profiles: Optional[List[ScoringProfile]] = None
default_scoring_profile: Optional[str] = None
cors_options: Optional[CorsOptions] = None
suggesters: Optional[List[Suggester]] = None
analyzers: Optional[List[LexicalAnalyzer]] = None
tokenizers: Optional[List[LexicalTokenizer]] = None
token_filters: Optional[List[TokenFilter]] = None
char_filters: Optional[List[CharFilter]] = None
encryption_key: Optional[SearchResourceEncryptionKey] = None
similarity: Optional[SimilarityAlgorithm] = None
semantic_search: Optional[SemanticSearch] = None
vector_search: Optional[VectorSearch] = None
e_tag: Optional[str] = NoneTypes for configuring vector similarity search and hybrid retrieval.
# Vector search configuration
class VectorSearch:
"""Vector search configuration for an index."""
algorithms: Optional[List[VectorSearchAlgorithmConfiguration]] = None
profiles: Optional[List[VectorSearchProfile]] = None
vectorizers: Optional[List[VectorSearchVectorizer]] = None
compressions: Optional[List[VectorSearchCompression]] = None
# Vector search profile
class VectorSearchProfile:
"""Named vector search configuration profile."""
name: str
algorithm_configuration_name: str
vectorizer_name: Optional[str] = None
compression_name: Optional[str] = None
# Algorithm configuration
class VectorSearchAlgorithmConfiguration:
"""Vector search algorithm configuration."""
name: str
kind: VectorSearchAlgorithmKind
def __init__(
self,
*,
name: str,
kind: VectorSearchAlgorithmKind,
**kwargs
): ...
class VectorSearchAlgorithmKind(str, Enum):
"""Types of vector search algorithms."""
HNSW = "hnsw" # Hierarchical Navigable Small World
EXHAUSTIVE_KNN = "exhaustiveKnn" # Exhaustive k-nearest neighbors
class VectorSearchAlgorithmMetric(str, Enum):
"""Distance metrics for vector similarity."""
COSINE = "cosine" # Cosine similarity
EUCLIDEAN = "euclidean" # Euclidean distance
DOT_PRODUCT = "dotProduct" # Dot product
# Vector compression
class VectorSearchCompression:
"""Vector compression configuration."""
name: str
kind: str
class BinaryQuantizationCompression(VectorSearchCompression):
"""Binary quantization compression."""
kind: str = "binaryQuantization"
rescore: Optional[bool] = None
class VectorSearchCompressionTarget(str, Enum):
"""Compression targets."""
SIZE = "size" # Optimize for storage size
SPEED = "speed" # Optimize for query speed
# Vectorizers
class VectorSearchVectorizer:
"""Base vectorizer configuration."""
name: str
kind: VectorSearchVectorizerKind
class VectorSearchVectorizerKind(str, Enum):
"""Types of vectorizers."""
AZURE_OPEN_AI = "azureOpenAI"
WEB_API = "webApi"
class AzureOpenAIVectorizer(VectorSearchVectorizer):
"""Azure OpenAI embedding vectorizer."""
kind: VectorSearchVectorizerKind = VectorSearchVectorizerKind.AZURE_OPEN_AI
azure_open_ai_parameters: Optional[AzureOpenAIVectorizerParameters] = None
class AzureOpenAIVectorizerParameters:
"""Parameters for Azure OpenAI vectorizer."""
resource_uri: Optional[str] = None
deployment_id: Optional[str] = None
api_key: Optional[str] = None
model_name: Optional[AzureOpenAIModelName] = None
class AzureOpenAIModelName(str, Enum):
"""Azure OpenAI embedding models."""
TEXT_EMBEDDING_ADA_002 = "text-embedding-ada-002"
TEXT_EMBEDDING_3_LARGE = "text-embedding-3-large"
TEXT_EMBEDDING_3_SMALL = "text-embedding-3-small"
class WebApiVectorizer(VectorSearchVectorizer):
"""Custom web API vectorizer."""
kind: VectorSearchVectorizerKind = VectorSearchVectorizerKind.WEB_API
web_api_parameters: Optional[WebApiVectorizerParameters] = None
class WebApiVectorizerParameters:
"""Parameters for web API vectorizer."""
uri: Optional[str] = None
http_method: Optional[str] = None
http_headers: Optional[Dict[str, str]] = None
auth_resource_id: Optional[str] = NoneTypes for configuring cognitive skills and AI-powered content enrichment.
# Base skill class
class SearchIndexerSkill:
"""Base class for indexer skills."""
odata_type: str
name: Optional[str] = None
description: Optional[str] = None
context: Optional[str] = None
inputs: List[InputFieldMappingEntry]
outputs: List[OutputFieldMappingEntry]
# Field mapping entries
class InputFieldMappingEntry:
"""Input field mapping for skills."""
name: str
source: str
source_context: Optional[str] = None
inputs: Optional[List["InputFieldMappingEntry"]] = None
class OutputFieldMappingEntry:
"""Output field mapping for skills."""
name: str
target_name: str
# Cognitive skills
class EntityRecognitionSkill(SearchIndexerSkill):
"""Entity recognition skill."""
odata_type: str = "#Microsoft.Skills.Text.EntityRecognitionSkill"
categories: Optional[List[str]] = None
default_language_code: Optional[str] = None
include_typeless_entities: Optional[bool] = None
minimum_precision: Optional[float] = None
class KeyPhraseExtractionSkill(SearchIndexerSkill):
"""Key phrase extraction skill."""
odata_type: str = "#Microsoft.Skills.Text.KeyPhraseExtractionSkill"
default_language_code: Optional[str] = None
max_key_phrase_count: Optional[int] = None
class LanguageDetectionSkill(SearchIndexerSkill):
"""Language detection skill."""
odata_type: str = "#Microsoft.Skills.Text.LanguageDetectionSkill"
class SentimentSkill(SearchIndexerSkill):
"""Sentiment analysis skill."""
odata_type: str = "#Microsoft.Skills.Text.SentimentSkill"
default_language_code: Optional[str] = None
model_version: Optional[SentimentSkillVersion] = None
class SentimentSkillVersion(str, Enum):
"""Sentiment skill versions."""
VERSION_1 = "1"
VERSION_3 = "3"
class OcrSkill(SearchIndexerSkill):
"""OCR text extraction skill."""
odata_type: str = "#Microsoft.Skills.Vision.OcrSkill"
text_extraction_algorithm: Optional[str] = None
default_language_code: Optional[str] = None
should_detect_orientation: Optional[bool] = None
class ImageAnalysisSkill(SearchIndexerSkill):
"""Image analysis skill."""
odata_type: str = "#Microsoft.Skills.Vision.ImageAnalysisSkill"
default_language_code: Optional[str] = None
visual_features: Optional[List[VisualFeature]] = None
details: Optional[List[str]] = None
class VisualFeature(str, Enum):
"""Visual features for image analysis."""
ADULT = "adult"
BRANDS = "brands"
CATEGORIES = "categories"
COLOR = "color"
DESCRIPTION = "description"
FACES = "faces"
OBJECTS = "objects"
TAGS = "tags"
class WebApiSkill(SearchIndexerSkill):
"""Custom web API skill."""
odata_type: str = "#Microsoft.Skills.Custom.WebApiSkill"
uri: str
http_method: Optional[str] = None
http_headers: Optional[Dict[str, str]] = None
timeout: Optional[str] = None
batch_size: Optional[int] = None
degree_of_parallelism: Optional[int] = None
class AzureOpenAIEmbeddingSkill(SearchIndexerSkill):
"""Azure OpenAI embedding skill."""
odata_type: str = "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill"
resource_uri: Optional[str] = None
api_key: Optional[str] = None
deployment_id: Optional[str] = None
model_name: Optional[AzureOpenAIModelName] = None
dimensions: Optional[int] = None
# Utility skills
class ConditionalSkill(SearchIndexerSkill):
"""Conditional logic skill."""
odata_type: str = "#Microsoft.Skills.Util.ConditionalSkill"
class DocumentExtractionSkill(SearchIndexerSkill):
"""Document extraction skill."""
odata_type: str = "#Microsoft.Skills.Util.DocumentExtractionSkill"
parsing_mode: Optional[str] = None
data_to_extract: Optional[str] = None
configuration: Optional[Dict[str, Any]] = None
class MergeSkill(SearchIndexerSkill):
"""Text merging skill."""
odata_type: str = "#Microsoft.Skills.Text.MergeSkill"
insert_pre_tag: Optional[str] = None
insert_post_tag: Optional[str] = None
class ShaperSkill(SearchIndexerSkill):
"""Data shaping skill."""
odata_type: str = "#Microsoft.Skills.Util.ShaperSkill"
class SplitSkill(SearchIndexerSkill):
"""Text splitting skill."""
odata_type: str = "#Microsoft.Skills.Text.SplitSkill"
text_split_mode: Optional[TextSplitMode] = None
maximum_page_length: Optional[int] = None
default_language_code: Optional[str] = None
class TextSplitMode(str, Enum):
"""Text splitting modes."""
PAGES = "pages"
SENTENCES = "sentences"
class TextTranslationSkill(SearchIndexerSkill):
"""Text translation skill."""
odata_type: str = "#Microsoft.Skills.Text.TranslationSkill"
default_to_language_code: str
default_from_language_code: Optional[str] = None
suggested_from: Optional[TextTranslationSkillLanguage] = None
class TextTranslationSkillLanguage(str, Enum):
"""Translation language codes."""
EN = "en"
ES = "es"
FR = "fr"
DE = "de"
# ... additional language codesTypes for configuring text analysis, tokenization, and linguistic processing.
# Analyzer types
class LexicalAnalyzer:
"""Base analyzer class."""
odata_type: str
name: str
class StandardAnalyzer(LexicalAnalyzer):
"""Standard Lucene analyzer."""
odata_type: str = "#Microsoft.Azure.Search.StandardAnalyzer"
max_token_length: Optional[int] = None
stopwords: Optional[List[str]] = None
class StopAnalyzer(LexicalAnalyzer):
"""Stop word analyzer."""
odata_type: str = "#Microsoft.Azure.Search.StopAnalyzer"
stopwords: Optional[List[str]] = None
class PatternAnalyzer(LexicalAnalyzer):
"""Pattern-based analyzer."""
odata_type: str = "#Microsoft.Azure.Search.PatternAnalyzer"
pattern: Optional[str] = None
flags: Optional[str] = None
stopwords: Optional[List[str]] = None
class CustomAnalyzer(LexicalAnalyzer):
"""Custom analyzer definition."""
odata_type: str = "#Microsoft.Azure.Search.CustomAnalyzer"
tokenizer_name: str
token_filters: Optional[List[str]] = None
char_filters: Optional[List[str]] = None
# Tokenizers
class LexicalTokenizer:
"""Base tokenizer class."""
odata_type: str
name: str
class StandardTokenizer(LexicalTokenizer):
"""Standard Lucene tokenizer."""
odata_type: str = "#Microsoft.Azure.Search.StandardTokenizer"
max_token_length: Optional[int] = None
class KeywordTokenizer(LexicalTokenizer):
"""Keyword tokenizer."""
odata_type: str = "#Microsoft.Azure.Search.KeywordTokenizer"
buffer_size: Optional[int] = None
# Token filters
class TokenFilter:
"""Base token filter class."""
odata_type: str
name: str
class LowercaseTokenFilter(TokenFilter):
"""Lowercase token filter."""
odata_type: str = "#Microsoft.Azure.Search.LowercaseTokenFilter"
class StopwordsTokenFilter(TokenFilter):
"""Stop words token filter."""
odata_type: str = "#Microsoft.Azure.Search.StopwordsTokenFilter"
stopwords: Optional[List[str]] = None
stopwords_list: Optional[StopwordsList] = None
ignore_case: Optional[bool] = None
remove_trailing: Optional[bool] = None
class StopwordsList(str, Enum):
"""Predefined stopwords lists."""
ARABIC = "arabic"
ARMENIAN = "armenian"
BASQUE = "basque"
BRAZILIAN = "brazilian"
BULGARIAN = "bulgarian"
CATALAN = "catalan"
CZECH = "czech"
DANISH = "danish"
DUTCH = "dutch"
ENGLISH = "english"
FINNISH = "finnish"
FRENCH = "french"
GALICIAN = "galician"
GERMAN = "german"
GREEK = "greek"
HINDI = "hindi"
HUNGARIAN = "hungarian"
INDONESIAN = "indonesian"
IRISH = "irish"
ITALIAN = "italian"
LATVIAN = "latvian"
NORWEGIAN = "norwegian"
PERSIAN = "persian"
PORTUGUESE = "portuguese"
ROMANIAN = "romanian"
RUSSIAN = "russian"
SORANI = "sorani"
SPANISH = "spanish"
SWEDISH = "swedish"
THAI = "thai"
TURKISH = "turkish"
# Character filters
class CharFilter:
"""Base character filter class."""
odata_type: str
name: str
class MappingCharFilter(CharFilter):
"""Character mapping filter."""
odata_type: str = "#Microsoft.Azure.Search.MappingCharFilter"
mappings: List[str]
class PatternReplaceCharFilter(CharFilter):
"""Pattern replacement character filter."""
odata_type: str = "#Microsoft.Azure.Search.PatternReplaceCharFilter"
pattern: str
replacement: strTypes for configuring search result ranking and similarity algorithms.
# Similarity algorithms
class SimilarityAlgorithm:
"""Base similarity algorithm."""
odata_type: str
class BM25SimilarityAlgorithm(SimilarityAlgorithm):
"""BM25 similarity algorithm."""
odata_type: str = "#Microsoft.Azure.Search.BM25Similarity"
k1: Optional[float] = None
b: Optional[float] = None
class ClassicSimilarityAlgorithm(SimilarityAlgorithm):
"""Classic TF-IDF similarity."""
odata_type: str = "#Microsoft.Azure.Search.ClassicSimilarity"
# Scoring profiles
class ScoringProfile:
"""Custom scoring profile."""
name: str
text_weights: Optional[TextWeights] = None
functions: Optional[List[ScoringFunction]] = None
function_aggregation: Optional[FunctionAggregation] = None
class TextWeights:
"""Text field weights for scoring."""
weights: Dict[str, float]
class FunctionAggregation(str, Enum):
"""Function aggregation modes."""
SUM = "sum"
AVERAGE = "average"
MINIMUM = "minimum"
MAXIMUM = "maximum"
FIRST_MATCHING = "firstMatching"
# Scoring functions
class ScoringFunction:
"""Base scoring function."""
type: str
field_name: str
boost: float
interpolation: Optional[str] = None
class DistanceScoringFunction(ScoringFunction):
"""Distance-based scoring function."""
type: str = "distance"
distance: DistanceScoringParameters
class FreshnessScoringFunction(ScoringFunction):
"""Freshness-based scoring function."""
type: str = "freshness"
freshness: FreshnessScoringParameters
class MagnitudeScoringFunction(ScoringFunction):
"""Magnitude-based scoring function."""
type: str = "magnitude"
magnitude: MagnitudeScoringParameters
class TagScoringFunction(ScoringFunction):
"""Tag-based scoring function."""
type: str = "tag"
tag: TagScoringParametersfrom azure.search.documents.indexes.models import SearchField, SearchFieldDataType
# Vector field for embeddings
vector_field = SearchField(
name="content_vector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
vector_search_dimensions=1536,
vector_search_profile_name="my-vector-config"
)
# Complex type with nested fields
address_field = ComplexField(
name="address",
fields=[
SimpleField("street", SearchFieldDataType.String),
SimpleField("city", SearchFieldDataType.String, filterable=True),
SimpleField("zipCode", SearchFieldDataType.String, filterable=True)
]
)
# Collection of complex types
addresses_field = ComplexField(
name="addresses",
collection=True,
fields=[
SimpleField("type", SearchFieldDataType.String),
SimpleField("street", SearchFieldDataType.String),
SimpleField("city", SearchFieldDataType.String)
]
)from azure.search.documents.models import QueryType, SearchMode, VectorizedQuery
# Semantic search query
results = client.search(
search_text="find documents about machine learning",
query_type=QueryType.SEMANTIC,
semantic_configuration_name="my-semantic-config",
query_answer="extractive",
query_caption="extractive",
top=10
)
# Hybrid vector + text search
vector_query = VectorizedQuery(
vector=[0.1, 0.2, 0.3, ...],
k_nearest_neighbors=5,
fields="content_vector"
)
results = client.search(
search_text="machine learning algorithms",
vector_queries=[vector_query],
search_mode=SearchMode.ALL,
top=20
)from azure.search.documents import IndexDocumentsBatch
from azure.search.documents.models import IndexAction
# Create batch with mixed operations
batch = IndexDocumentsBatch()
# Add documents
batch.add_upload_actions([
{"id": "1", "title": "New Document", "content": "Content here"}
])
# Update documents
batch.add_merge_actions([
{"id": "2", "title": "Updated Title"}
])
# Delete documents
batch.add_delete_actions([
{"id": "3"}
])
# Execute batch
results = client.index_documents(batch)
for result in results:
if result.succeeded():
print(f"Document {result.key} processed successfully")
else:
print(f"Error processing {result.key}: {result.error_message}")Install with Tessl CLI
npx tessl i tessl/pypi-azure-search-documents