Lightweight Python client for Apache Solr
npx @tessl/cli install tessl/pypi-pysolr@3.10.0A lightweight Python client for Apache Solr that provides a simple interface for performing basic Solr operations including document selection, updating, deletion, index optimization, More Like This functionality, spelling correction, timeout handling, and SolrCloud awareness.
pip install pysolrsimplejson, kazoo (for SolrCloud)import pysolrFor SolrCloud functionality:
import pysolr
# Requires: pip install pysolr[solrcloud]import pysolr
# Create a Solr client instance
solr = pysolr.Solr('http://localhost:8983/solr/my_core', always_commit=True)
# Health check
solr.ping()
# Index documents
solr.add([
{
"id": "doc_1",
"title": "A test document",
"content": "This is a sample document for indexing."
},
{
"id": "doc_2",
"title": "Another document",
"content": "More content to be indexed."
}
])
# Search for documents
results = solr.search('test')
print(f"Found {len(results)} documents")
for doc in results:
print(f"ID: {doc['id']}, Title: {doc['title']}")
# Delete documents
solr.delete(id='doc_1')
# Commit changes
solr.commit()PySOLR uses a straightforward client-server architecture:
The library abstracts Solr's HTTP API into Python-friendly interfaces while maintaining access to all Solr features through keyword arguments.
Essential Solr operations including client initialization, health checks, document indexing, searching, deletion, and index management. These operations form the foundation for interacting with Solr servers.
class Solr:
def __init__(self, url, decoder=None, encoder=None, timeout=60, results_cls=Results,
search_handler="select", use_qt_param=False, always_commit=False,
auth=None, verify=True, session=None): ...
def ping(self, handler="admin/ping", **kwargs): ...
def add(self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False,
commitWithin=None, waitFlush=None, waitSearcher=None, overwrite=None,
handler="update", min_rf=None): ...
def delete(self, id=None, q=None, commit=None, softCommit=False,
waitFlush=None, waitSearcher=None, handler="update"): ...
def commit(self, softCommit=False, waitFlush=None, waitSearcher=None,
expungeDeletes=None, handler="update"): ...
def optimize(self, commit=True, waitFlush=None, waitSearcher=None,
maxSegments=None, handler="update"): ...
def extract(self, file_obj, extractOnly=True, handler="update/extract", **kwargs): ...Advanced search functionality including basic queries, More Like This queries, term suggestions, and result handling with pagination and cursor mark support.
def search(self, q, search_handler=None, **kwargs): ...
def more_like_this(self, q, mltfl, handler="mlt", **kwargs): ...
def suggest_terms(self, fields, prefix, handler="terms", **kwargs): ...
class Results:
def __init__(self, decoded, next_page_query=None): ...
docs: list
hits: int
highlighting: dict
facets: dict
spellcheck: dictCore administration capabilities for managing Solr cores including creation, reloading, renaming, swapping, and status monitoring.
class SolrCoreAdmin:
def __init__(self, url, *args, **kwargs): ...
def status(self, core=None): ...
def create(self, name, instance_dir=None, config="solrconfig.xml", schema="schema.xml"): ...
def reload(self, core): ...
def rename(self, core, other): ...
def swap(self, core, other): ...
def unload(self, core): ...SolrCloud cluster support with ZooKeeper coordination, automatic failover, leader detection, and distributed query handling across multiple Solr nodes.
class SolrCloud(Solr):
def __init__(self, zookeeper, collection, decoder=None, encoder=None, timeout=60,
retry_count=5, retry_timeout=0.2, auth=None, verify=True, *args, **kwargs): ...
class ZooKeeper:
def __init__(self, zkServerAddress, timeout=15, max_retries=-1, kazoo_client=None): ...
def getHosts(self, collname, only_leader=False, seen_aliases=None): ...
def getRandomURL(self, collname, only_leader=False): ...
def getLeaderURL(self, collname): ...Advanced document handling including content extraction with Apache Tika, nested document support, field updates, and XML/JSON processing utilities.
def extract(self, file_obj, extractOnly=True, handler="update/extract", **kwargs): ...Helper functions for data conversion, text processing, URL encoding, and XML sanitization used throughout the library.
def get_version(): ...
def force_unicode(value): ...
def force_bytes(value): ...
def unescape_html(text): ...
def safe_urlencode(params, doseq=0): ...
def clean_xml_string(s): ...
def sanitize(data): ...
def is_py3(): ...class SolrError(Exception):
"""Base exception for Solr-related errors."""
pass
# Constants
NESTED_DOC_KEY = "_childDocuments_" # Key for nested documents in document structure
# Type aliases for complex parameters
AuthType = tuple # HTTP auth tuple (username, password) or requests auth object
SessionType = requests.Session # Custom requests session
DecoderType = json.JSONDecoder # Custom JSON decoder
EncoderType = json.JSONEncoder # Custom JSON encoder