tessl/pypi-pysolr

Lightweight Python client for Apache Solr

—

Pending

Overview

Eval results

Files

Search Operations

Name: tessl/pypi-pysolr
Author: tessl

Advanced search functionality including basic queries, More Like This queries, term suggestions, and comprehensive result handling with pagination and metadata access.

Capabilities

Basic Search

Perform search queries against the Solr index with support for Lucene query syntax, faceting, highlighting, and other Solr features.

def search(self, q, search_handler=None, **kwargs):
    """
    Perform a search query against the Solr index.

    Parameters:
    - q (str): Lucene query string (e.g., "title:python", "*:*", "field:value AND other:term")
    - search_handler (str, optional): Custom search handler override
    - **kwargs: Additional Solr parameters:
        - start (int): Starting offset for results (default: 0)
        - rows (int): Number of results to return (default: 10)
        - sort (str): Sort specification (e.g., "score desc", "title asc")
        - fl (str): Fields to return (e.g., "id,title,score")
        - fq (str or list): Filter queries
        - facet (bool): Enable faceting
        - facet.field (str or list): Fields to facet on
        - hl (bool): Enable highlighting
        - hl.fl (str): Fields to highlight
        - spellcheck (bool): Enable spell checking
        - cursorMark (str): Cursor mark for deep paging

    Returns:
    Results: Results object containing documents, metadata, and iteration support

    Raises:
    SolrError: If search fails or query is invalid
    """

Usage:

# Basic search
results = solr.search('python')
for doc in results:
    print(f"ID: {doc['id']}, Title: {doc.get('title', 'N/A')}")

# Advanced search with parameters
results = solr.search(
    'title:python AND content:tutorial',
    start=20,
    rows=50,
    sort='score desc',
    fl='id,title,content,score',
    fq=['category:programming', 'published:[2020-01-01T00:00:00Z TO *]']
)

# Search with faceting
results = solr.search(
    '*:*',
    facet=True,
    facet_field=['category', 'author'],
    facet_mincount=1
)
print(f"Category facets: {results.facets}")

# Search with highlighting
results = solr.search(
    'python tutorial',
    hl=True,
    hl_fl='title,content',
    hl_fragsize=100
)
for doc in results:
    doc_id = doc['id']
    if doc_id in results.highlighting:
        highlights = results.highlighting[doc_id]
        print(f"Highlights for {doc_id}: {highlights}")

# Deep paging with cursor marks
results = solr.search(
    '*:*',
    sort='id asc',  # Required for cursor marks
    cursorMark='*',
    rows=100
)
while len(results.docs) > 0:
    print(f"Processing {len(results.docs)} documents")
    # Get next page using nextCursorMark
    if results.nextCursorMark:
        results = solr.search(
            '*:*',
            sort='id asc',
            cursorMark=results.nextCursorMark,
            rows=100
        )
    else:
        break

More Like This

Find documents similar to a given document or query using Solr's More Like This functionality.

def more_like_this(self, q, mltfl, handler="mlt", **kwargs):
    """
    Find documents similar to the provided query using More Like This.

    Parameters:
    - q (str): Base query to find similar documents (e.g., "id:doc_123")
    - mltfl (str): Fields to use for similarity analysis (comma-separated)
    - handler (str): MLT handler name (default: "mlt")
    - **kwargs: Additional MLT parameters:
        - mlt.mindf (int): Minimum document frequency
        - mlt.mintf (int): Minimum term frequency  
        - mlt.maxqt (int): Maximum query terms
        - mlt.maxntp (int): Maximum number of tokens parsed
        - mlt.boost (bool): Boost terms by score
        - rows (int): Number of similar documents to return

    Returns:
    Results: Results object containing similar documents

    Raises:
    SolrError: If MLT query fails or handler is not configured
    """

Usage:

# Find documents similar to a specific document
similar = solr.more_like_this('id:doc_123', 'title,content')
print(f"Found {len(similar)} similar documents")
for doc in similar:
    print(f"Similar: {doc['id']} - {doc.get('title', 'N/A')}")

# MLT with parameters
similar = solr.more_like_this(
    'id:doc_123',
    'title,content,tags',
    mlt_mindf=2,
    mlt_mintf=1,
    mlt_maxqt=10,
    rows=20
)

Term Suggestions

Get term suggestions and completions for fields using Solr's Terms component.

def suggest_terms(self, fields, prefix, handler="terms", **kwargs):
    """
    Get term suggestions for fields based on a prefix.

    Parameters:
    - fields (str or list): Field name(s) to get suggestions from
    - prefix (str): Prefix to match terms against
    - handler (str): Terms handler name (default: "terms")
    - **kwargs: Additional terms parameters:
        - terms.limit (int): Maximum number of terms to return
        - terms.mincount (int): Minimum term frequency
        - terms.maxcount (int): Maximum term frequency
        - terms.raw (bool): Return raw term data
        - terms.regex (str): Regular expression filter
        - terms.sort (str): Sort order ("count" or "index")

    Returns:
    dict: Dictionary with field names as keys and lists of (term, count) tuples as values

    Raises:
    SolrError: If terms query fails or handler is not configured
    """

Usage:

# Basic term suggestions
suggestions = solr.suggest_terms('title', 'pyth')
for field, terms in suggestions.items():
    print(f"Suggestions for {field}:")
    for term, count in terms:
        print(f"  {term} ({count} docs)")

# Multiple fields with parameters
suggestions = solr.suggest_terms(
    ['title', 'tags'],
    'prog',
    terms_limit=10,
    terms_mincount=5,
    terms_sort='count'
)

Results Object

Comprehensive result wrapper providing access to documents, metadata, and iteration capabilities.

class Results:
    def __init__(self, decoded, next_page_query=None):
        """
        Initialize results from decoded Solr response.

        Parameters:
        - decoded (dict): Decoded JSON response from Solr
        - next_page_query (callable, optional): Function to get next page for cursor mark pagination
        """

    # Document access
    docs: list  # List of document dictionaries
    hits: int   # Total number of matching documents
    
    # Response metadata
    raw_response: dict      # Complete Solr response
    qtime: int             # Query execution time in milliseconds
    debug: dict            # Debug information (if requested)
    
    # Search features
    highlighting: dict     # Highlighting results by document ID
    facets: dict          # Facet counts and information
    spellcheck: dict      # Spell check suggestions
    stats: dict           # Field statistics (if requested)
    grouped: dict         # Grouping/field collapsing results
    
    # Pagination
    nextCursorMark: str   # Next cursor mark for deep paging (if applicable)
    
    def __len__(self):
        """Return number of documents in current results page."""
        
    def __iter__(self):
        """Iterate over all documents, automatically handling pagination if cursor marks are used."""

Usage:

results = solr.search('python', rows=10)

# Access documents
print(f"Found {results.hits} total documents")
print(f"Showing {len(results)} documents in this page")
print(f"Query took {results.qtime}ms")

# Iterate through documents
for doc in results:
    print(f"Document: {doc}")

# Access specific documents
first_doc = results.docs[0]
print(f"First document ID: {first_doc['id']}")

# Access metadata
if results.facets:
    print(f"Facet information: {results.facets}")

if results.highlighting:
    for doc_id, highlights in results.highlighting.items():
        print(f"Highlights for {doc_id}: {highlights}")

if results.spellcheck:
    suggestions = results.spellcheck.get('suggestions', [])
    print(f"Spell check suggestions: {suggestions}")

# Access raw response for custom processing
response_header = results.raw_response.get('responseHeader', {})
print(f"Response status: {response_header.get('status')}")

Error Handling

Search operations can raise various exceptions that should be handled appropriately:

import pysolr

try:
    results = solr.search('invalid:query:syntax')
except pysolr.SolrError as e:
    print(f"Search failed: {e}")
    # Handle search errors (invalid syntax, server errors, etc.)

try:
    results = solr.search('field:value', timeout=1)
except pysolr.SolrError as e:
    print(f"Search timed out: {e}")
    # Handle timeout errors

# Check for empty results
results = solr.search('rare_term')
if results.hits == 0:
    print("No documents found")
elif len(results.docs) == 0:
    print("Total hits > 0 but no docs in current page")

Install with Tessl CLI