CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pysolr

Lightweight Python client for Apache Solr

Pending
Overview
Eval results
Files

core-client.mddocs/

Core Client Operations

Essential Solr operations that form the foundation for interacting with Solr servers. These operations handle client initialization, health monitoring, document management, and index maintenance.

Capabilities

Client Initialization

Create and configure a Solr client instance with connection settings, authentication, timeouts, and custom handlers.

class Solr:
    def __init__(self, url, decoder=None, encoder=None, timeout=60, results_cls=Results, 
                 search_handler="select", use_qt_param=False, always_commit=False, 
                 auth=None, verify=True, session=None):
        """
        Initialize a Solr client.

        Parameters:
        - url (str): Solr server URL (e.g., 'http://localhost:8983/solr/core_name')
        - decoder (json.JSONDecoder, optional): Custom JSON decoder instance
        - encoder (json.JSONEncoder, optional): Custom JSON encoder instance  
        - timeout (int): Request timeout in seconds (default: 60)
        - results_cls (type): Results class for search responses (default: Results)
        - search_handler (str): Default search handler name (default: "select")
        - use_qt_param (bool): Use qt parameter instead of handler path (default: False)
        - always_commit (bool): Auto-commit all update operations (default: False)
        - auth (tuple or requests auth object, optional): HTTP authentication
        - verify (bool): Enable SSL certificate verification (default: True)
        - session (requests.Session, optional): Custom requests session
        """

Usage:

import pysolr

# Basic client
solr = pysolr.Solr('http://localhost:8983/solr/my_core')

# Client with timeout and authentication
solr = pysolr.Solr(
    'https://solr.example.com/solr/my_core',
    timeout=30,
    auth=('username', 'password'),
    always_commit=True
)

# Client with custom session and SSL settings
import requests
session = requests.Session()
session.headers.update({'User-Agent': 'MyApp/1.0'})

solr = pysolr.Solr(
    'https://solr.example.com/solr/my_core',
    session=session,
    verify='/path/to/ca-bundle.crt'
)

Health Check

Test connectivity and server health with ping operations.

def ping(self, handler="admin/ping", **kwargs):
    """
    Send a ping request to test server connectivity.

    Parameters:
    - handler (str): Ping handler path (default: "admin/ping")
    - **kwargs: Additional parameters passed to Solr

    Returns:
    str: Server response content

    Raises:
    SolrError: If ping fails or server is unreachable
    """

Usage:

try:
    response = solr.ping()
    print("Solr server is healthy")
except pysolr.SolrError as e:
    print(f"Solr server is down: {e}")

Document Indexing

Add or update documents in the Solr index with support for batch operations, field updates, and commit control.

def add(self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False, 
        commitWithin=None, waitFlush=None, waitSearcher=None, overwrite=None, 
        handler="update", min_rf=None):
    """
    Add or update documents in the index.

    Parameters:
    - docs (list or dict): Document(s) to index. Each document is a dict with field names as keys
    - boost (dict, optional): Per-field boost values {"field_name": boost_value}
    - fieldUpdates (dict, optional): Field update operations {"field": "set"/"add"/"inc"}
    - commit (bool, optional): Force commit after operation (overrides always_commit)
    - softCommit (bool): Perform soft commit (default: False)
    - commitWithin (int, optional): Auto-commit within specified milliseconds
    - waitFlush (bool, optional): Wait for flush to complete
    - waitSearcher (bool, optional): Wait for new searcher
    - overwrite (bool, optional): Allow document overwrites (default: True)
    - handler (str): Update handler path (default: "update")
    - min_rf (int, optional): Minimum replication factor for SolrCloud

    Returns:
    str: Server response content

    Raises:
    SolrError: If indexing fails
    ValueError: If docs parameter is invalid
    """

Usage:

# Single document
solr.add({
    "id": "doc_1",  
    "title": "Sample Document",
    "content": "This is the document content.",
    "category": "example"
})

# Multiple documents
docs = [
    {"id": "doc_1", "title": "First Document", "content": "Content 1"},
    {"id": "doc_2", "title": "Second Document", "content": "Content 2"}  
]
solr.add(docs)

# With field boosts
solr.add(
    {"id": "doc_1", "title": "Important Document", "content": "Key content"},
    boost={"title": 2.0, "content": 1.5}
)

# Atomic field updates  
solr.add(
    {"id": "existing_doc", "category": "updated"},
    fieldUpdates={"category": "set"}
)

# With commit control
solr.add(docs, commit=True)  # Force immediate commit
solr.add(docs, commitWithin=5000)  # Auto-commit within 5 seconds

Document Deletion

Remove documents from the index by ID or query with commit control options.

def delete(self, id=None, q=None, commit=None, softCommit=False, 
           waitFlush=None, waitSearcher=None, handler="update"):
    """
    Delete documents from the index.

    Parameters:
    - id (str, list, or None): Document ID(s) to delete. Can be single ID or list of IDs
    - q (str or None): Lucene query to select documents for deletion
    - commit (bool, optional): Force commit after deletion (overrides always_commit)
    - softCommit (bool): Perform soft commit (default: False)
    - waitFlush (bool, optional): Wait for flush to complete
    - waitSearcher (bool, optional): Wait for new searcher
    - handler (str): Update handler path (default: "update")

    Returns:
    str: Server response content

    Raises:
    SolrError: If deletion fails
    ValueError: If neither id nor q is specified, or both are specified
    """

Usage:

# Delete by single ID
solr.delete(id='doc_1') 

# Delete by multiple IDs
solr.delete(id=['doc_1', 'doc_2', 'doc_3'])

# Delete by query
solr.delete(q='category:obsolete')
solr.delete(q='*:*')  # Delete all documents

# With commit control
solr.delete(id='doc_1', commit=True)

Index Commit

Force Solr to write pending changes to disk and make them searchable.

def commit(self, softCommit=False, waitFlush=None, waitSearcher=None, 
           expungeDeletes=None, handler="update"):
    """
    Force Solr to commit pending changes to disk.

    Parameters:
    - softCommit (bool): Perform soft commit (visible but not durable) (default: False)
    - waitFlush (bool, optional): Wait for flush to complete before returning
    - waitSearcher (bool, optional): Wait for new searcher before returning
    - expungeDeletes (bool, optional): Expunge deleted documents during commit
    - handler (str): Update handler path (default: "update")

    Returns:
    str: Server response content

    Raises:
    SolrError: If commit fails
    """

Usage:

# Standard commit
solr.commit()

# Soft commit (fast, visible immediately but not durable)
solr.commit(softCommit=True)

# Hard commit with deleted document cleanup
solr.commit(expungeDeletes=True)

# Synchronous commit (wait for completion)
solr.commit(waitFlush=True, waitSearcher=True)

Index Optimization

Optimize the Solr index by reducing the number of segments, improving query performance.

def optimize(self, commit=True, waitFlush=None, waitSearcher=None, 
             maxSegments=None, handler="update"):
    """
    Optimize the Solr index by merging segments.

    Parameters:
    - commit (bool): Commit after optimization (default: True)
    - waitFlush (bool, optional): Wait for flush to complete
    - waitSearcher (bool, optional): Wait for new searcher
    - maxSegments (int, optional): Maximum number of segments to merge down to
    - handler (str): Update handler path (default: "update")

    Returns:
    str: Server response content

    Raises:
    SolrError: If optimization fails
    """

Usage:

# Basic optimization
solr.optimize()

# Optimize to specific segment count
solr.optimize(maxSegments=1)

# Asynchronous optimization
solr.optimize(waitFlush=False, waitSearcher=False)

Content Extraction

Extract content and metadata from files using Apache Tika integration for rich document processing.

def extract(self, file_obj, extractOnly=True, handler="update/extract", **kwargs):
    """
    Extract content and metadata from files using Apache Tika.

    Parameters:
    - file_obj (file-like object): File object with a 'name' attribute to extract from
    - extractOnly (bool): If True, only extract without indexing (default: True)
    - handler (str): Extract handler path (default: "update/extract")
    - **kwargs: Additional parameters passed to Solr ExtractingRequestHandler

    Returns:
    dict: Dictionary containing extracted content and metadata:
        - contents: Extracted full-text content (if applicable)
        - metadata: Key-value pairs of extracted metadata

    Raises:
    ValueError: If file_obj doesn't have a 'name' attribute
    SolrError: If extraction fails or server error occurs
    """

Usage:

# Extract content from a PDF file
with open('document.pdf', 'rb') as pdf_file:
    extracted = solr.extract(pdf_file)
    print("Content:", extracted.get('contents', 'No content'))
    print("Metadata:", extracted.get('metadata', {}))

# Extract and index in one step
with open('document.docx', 'rb') as doc_file:
    result = solr.extract(
        doc_file, 
        extractOnly=False,  # Index the document
        literal_id='doc_123',  # Provide document ID
        literal_title='Important Document'  # Add custom fields
    )

Types

class SolrError(Exception):
    """Exception raised for Solr-related errors including network issues, timeouts, and server errors."""
    pass

Install with Tessl CLI

npx tessl i tessl/pypi-pysolr

docs

admin-operations.md

core-client.md

document-processing.md

index.md

search-operations.md

solrcloud-support.md

utilities.md

tile.json