Lightweight Python client for Apache Solr
—
Essential Solr operations that form the foundation for interacting with Solr servers. These operations handle client initialization, health monitoring, document management, and index maintenance.
Create and configure a Solr client instance with connection settings, authentication, timeouts, and custom handlers.
class Solr:
def __init__(self, url, decoder=None, encoder=None, timeout=60, results_cls=Results,
search_handler="select", use_qt_param=False, always_commit=False,
auth=None, verify=True, session=None):
"""
Initialize a Solr client.
Parameters:
- url (str): Solr server URL (e.g., 'http://localhost:8983/solr/core_name')
- decoder (json.JSONDecoder, optional): Custom JSON decoder instance
- encoder (json.JSONEncoder, optional): Custom JSON encoder instance
- timeout (int): Request timeout in seconds (default: 60)
- results_cls (type): Results class for search responses (default: Results)
- search_handler (str): Default search handler name (default: "select")
- use_qt_param (bool): Use qt parameter instead of handler path (default: False)
- always_commit (bool): Auto-commit all update operations (default: False)
- auth (tuple or requests auth object, optional): HTTP authentication
- verify (bool): Enable SSL certificate verification (default: True)
- session (requests.Session, optional): Custom requests session
"""Usage:
import pysolr
# Basic client
solr = pysolr.Solr('http://localhost:8983/solr/my_core')
# Client with timeout and authentication
solr = pysolr.Solr(
'https://solr.example.com/solr/my_core',
timeout=30,
auth=('username', 'password'),
always_commit=True
)
# Client with custom session and SSL settings
import requests
session = requests.Session()
session.headers.update({'User-Agent': 'MyApp/1.0'})
solr = pysolr.Solr(
'https://solr.example.com/solr/my_core',
session=session,
verify='/path/to/ca-bundle.crt'
)Test connectivity and server health with ping operations.
def ping(self, handler="admin/ping", **kwargs):
"""
Send a ping request to test server connectivity.
Parameters:
- handler (str): Ping handler path (default: "admin/ping")
- **kwargs: Additional parameters passed to Solr
Returns:
str: Server response content
Raises:
SolrError: If ping fails or server is unreachable
"""Usage:
try:
response = solr.ping()
print("Solr server is healthy")
except pysolr.SolrError as e:
print(f"Solr server is down: {e}")Add or update documents in the Solr index with support for batch operations, field updates, and commit control.
def add(self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False,
commitWithin=None, waitFlush=None, waitSearcher=None, overwrite=None,
handler="update", min_rf=None):
"""
Add or update documents in the index.
Parameters:
- docs (list or dict): Document(s) to index. Each document is a dict with field names as keys
- boost (dict, optional): Per-field boost values {"field_name": boost_value}
- fieldUpdates (dict, optional): Field update operations {"field": "set"/"add"/"inc"}
- commit (bool, optional): Force commit after operation (overrides always_commit)
- softCommit (bool): Perform soft commit (default: False)
- commitWithin (int, optional): Auto-commit within specified milliseconds
- waitFlush (bool, optional): Wait for flush to complete
- waitSearcher (bool, optional): Wait for new searcher
- overwrite (bool, optional): Allow document overwrites (default: True)
- handler (str): Update handler path (default: "update")
- min_rf (int, optional): Minimum replication factor for SolrCloud
Returns:
str: Server response content
Raises:
SolrError: If indexing fails
ValueError: If docs parameter is invalid
"""Usage:
# Single document
solr.add({
"id": "doc_1",
"title": "Sample Document",
"content": "This is the document content.",
"category": "example"
})
# Multiple documents
docs = [
{"id": "doc_1", "title": "First Document", "content": "Content 1"},
{"id": "doc_2", "title": "Second Document", "content": "Content 2"}
]
solr.add(docs)
# With field boosts
solr.add(
{"id": "doc_1", "title": "Important Document", "content": "Key content"},
boost={"title": 2.0, "content": 1.5}
)
# Atomic field updates
solr.add(
{"id": "existing_doc", "category": "updated"},
fieldUpdates={"category": "set"}
)
# With commit control
solr.add(docs, commit=True) # Force immediate commit
solr.add(docs, commitWithin=5000) # Auto-commit within 5 secondsRemove documents from the index by ID or query with commit control options.
def delete(self, id=None, q=None, commit=None, softCommit=False,
waitFlush=None, waitSearcher=None, handler="update"):
"""
Delete documents from the index.
Parameters:
- id (str, list, or None): Document ID(s) to delete. Can be single ID or list of IDs
- q (str or None): Lucene query to select documents for deletion
- commit (bool, optional): Force commit after deletion (overrides always_commit)
- softCommit (bool): Perform soft commit (default: False)
- waitFlush (bool, optional): Wait for flush to complete
- waitSearcher (bool, optional): Wait for new searcher
- handler (str): Update handler path (default: "update")
Returns:
str: Server response content
Raises:
SolrError: If deletion fails
ValueError: If neither id nor q is specified, or both are specified
"""Usage:
# Delete by single ID
solr.delete(id='doc_1')
# Delete by multiple IDs
solr.delete(id=['doc_1', 'doc_2', 'doc_3'])
# Delete by query
solr.delete(q='category:obsolete')
solr.delete(q='*:*') # Delete all documents
# With commit control
solr.delete(id='doc_1', commit=True)Force Solr to write pending changes to disk and make them searchable.
def commit(self, softCommit=False, waitFlush=None, waitSearcher=None,
expungeDeletes=None, handler="update"):
"""
Force Solr to commit pending changes to disk.
Parameters:
- softCommit (bool): Perform soft commit (visible but not durable) (default: False)
- waitFlush (bool, optional): Wait for flush to complete before returning
- waitSearcher (bool, optional): Wait for new searcher before returning
- expungeDeletes (bool, optional): Expunge deleted documents during commit
- handler (str): Update handler path (default: "update")
Returns:
str: Server response content
Raises:
SolrError: If commit fails
"""Usage:
# Standard commit
solr.commit()
# Soft commit (fast, visible immediately but not durable)
solr.commit(softCommit=True)
# Hard commit with deleted document cleanup
solr.commit(expungeDeletes=True)
# Synchronous commit (wait for completion)
solr.commit(waitFlush=True, waitSearcher=True)Optimize the Solr index by reducing the number of segments, improving query performance.
def optimize(self, commit=True, waitFlush=None, waitSearcher=None,
maxSegments=None, handler="update"):
"""
Optimize the Solr index by merging segments.
Parameters:
- commit (bool): Commit after optimization (default: True)
- waitFlush (bool, optional): Wait for flush to complete
- waitSearcher (bool, optional): Wait for new searcher
- maxSegments (int, optional): Maximum number of segments to merge down to
- handler (str): Update handler path (default: "update")
Returns:
str: Server response content
Raises:
SolrError: If optimization fails
"""Usage:
# Basic optimization
solr.optimize()
# Optimize to specific segment count
solr.optimize(maxSegments=1)
# Asynchronous optimization
solr.optimize(waitFlush=False, waitSearcher=False)Extract content and metadata from files using Apache Tika integration for rich document processing.
def extract(self, file_obj, extractOnly=True, handler="update/extract", **kwargs):
"""
Extract content and metadata from files using Apache Tika.
Parameters:
- file_obj (file-like object): File object with a 'name' attribute to extract from
- extractOnly (bool): If True, only extract without indexing (default: True)
- handler (str): Extract handler path (default: "update/extract")
- **kwargs: Additional parameters passed to Solr ExtractingRequestHandler
Returns:
dict: Dictionary containing extracted content and metadata:
- contents: Extracted full-text content (if applicable)
- metadata: Key-value pairs of extracted metadata
Raises:
ValueError: If file_obj doesn't have a 'name' attribute
SolrError: If extraction fails or server error occurs
"""Usage:
# Extract content from a PDF file
with open('document.pdf', 'rb') as pdf_file:
extracted = solr.extract(pdf_file)
print("Content:", extracted.get('contents', 'No content'))
print("Metadata:", extracted.get('metadata', {}))
# Extract and index in one step
with open('document.docx', 'rb') as doc_file:
result = solr.extract(
doc_file,
extractOnly=False, # Index the document
literal_id='doc_123', # Provide document ID
literal_title='Important Document' # Add custom fields
)class SolrError(Exception):
"""Exception raised for Solr-related errors including network issues, timeouts, and server errors."""
passInstall with Tessl CLI
npx tessl i tessl/pypi-pysolr