0
# SolrCloud Support
1
2
SolrCloud cluster support with ZooKeeper coordination, automatic failover, leader detection, and distributed query handling across multiple Solr nodes for high availability and scalability.
3
4
## Capabilities
5
6
### SolrCloud Client
7
8
SolrCloud-aware client that extends the standard Solr client with cluster coordination capabilities.
9
10
```python { .api }
11
class SolrCloud(Solr):
12
def __init__(self, zookeeper, collection, decoder=None, encoder=None, timeout=60,
13
retry_count=5, retry_timeout=0.2, auth=None, verify=True, *args, **kwargs):
14
"""
15
Initialize a SolrCloud client with ZooKeeper coordination.
16
17
Parameters:
18
- zookeeper (ZooKeeper): ZooKeeper client instance for cluster coordination
19
- collection (str): SolrCloud collection name to work with
20
- decoder (json.JSONDecoder, optional): Custom JSON decoder instance
21
- encoder (json.JSONEncoder, optional): Custom JSON encoder instance
22
- timeout (int): Request timeout in seconds (default: 60)
23
- retry_count (int): Number of retry attempts for failed requests (default: 5)
24
- retry_timeout (float): Delay between retry attempts in seconds (default: 0.2)
25
- auth (tuple or requests auth object, optional): HTTP authentication
26
- verify (bool): Enable SSL certificate verification (default: True)
27
- *args, **kwargs: Additional arguments passed to parent Solr class
28
29
Raises:
30
SolrError: If ZooKeeper connection or collection access fails
31
"""
32
```
33
34
Usage:
35
36
```python
37
import pysolr
38
39
# Create ZooKeeper client
40
zk = pysolr.ZooKeeper('localhost:2181')
41
42
# Create SolrCloud client
43
solr_cloud = pysolr.SolrCloud(
44
zookeeper=zk,
45
collection='my_collection',
46
timeout=30,
47
retry_count=3,
48
auth=('username', 'password')
49
)
50
51
# Use like regular Solr client with automatic failover
52
results = solr_cloud.search('*:*')
53
solr_cloud.add([{'id': 'doc1', 'title': 'Test Document'}])
54
```
55
56
### ZooKeeper Coordination
57
58
ZooKeeper client for managing cluster state, node discovery, and leader election in SolrCloud deployments.
59
60
```python { .api }
61
class ZooKeeper:
62
def __init__(self, zkServerAddress, timeout=15, max_retries=-1, kazoo_client=None):
63
"""
64
Initialize ZooKeeper client for SolrCloud coordination.
65
66
Parameters:
67
- zkServerAddress (str): ZooKeeper server address (e.g., 'localhost:2181' or 'zk1:2181,zk2:2181,zk3:2181')
68
- timeout (int): Connection timeout in seconds (default: 15)
69
- max_retries (int): Maximum retry attempts (-1 for unlimited) (default: -1)
70
- kazoo_client (KazooClient, optional): Custom Kazoo client instance
71
72
Raises:
73
RuntimeError: If kazoo library is not installed
74
"""
75
76
def getHosts(self, collname, only_leader=False, seen_aliases=None):
77
"""
78
Get list of active Solr hosts for a collection.
79
80
Parameters:
81
- collname (str): Collection name or alias
82
- only_leader (bool): Return only leader nodes (default: False)
83
- seen_aliases (list, optional): Track aliases to prevent circular references
84
85
Returns:
86
list: List of active Solr base URLs for the collection
87
88
Raises:
89
SolrError: If collection is unknown or no active hosts found
90
"""
91
92
def getRandomURL(self, collname, only_leader=False):
93
"""
94
Get a random active Solr URL for load balancing.
95
96
Parameters:
97
- collname (str): Collection name or alias
98
- only_leader (bool): Return only leader nodes (default: False)
99
100
Returns:
101
str: Complete Solr URL including collection path
102
103
Raises:
104
SolrError: If no active shards are available
105
"""
106
107
def getLeaderURL(self, collname):
108
"""
109
Get a leader node URL for update operations.
110
111
Parameters:
112
- collname (str): Collection name or alias
113
114
Returns:
115
str: Complete Solr URL for a leader node
116
117
Raises:
118
SolrError: If no leader nodes are available
119
"""
120
```
121
122
Usage:
123
124
```python
125
import pysolr
126
127
# Initialize ZooKeeper client
128
zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')
129
130
# Get all active hosts for a collection
131
hosts = zk.getHosts('my_collection')
132
print(f"Active hosts: {hosts}")
133
134
# Get only leader hosts (for updates)
135
leaders = zk.getHosts('my_collection', only_leader=True)
136
print(f"Leader hosts: {leaders}")
137
138
# Get random URL for load balancing
139
random_url = zk.getRandomURL('my_collection')
140
print(f"Random URL: {random_url}")
141
142
# Get leader URL for updates
143
leader_url = zk.getLeaderURL('my_collection')
144
print(f"Leader URL: {leader_url}")
145
```
146
147
## Complete SolrCloud Setup Example
148
149
```python
150
import pysolr
151
152
# Step 1: Initialize ZooKeeper client
153
print("Connecting to ZooKeeper...")
154
zk = pysolr.ZooKeeper(
155
zkServerAddress='zk1:2181,zk2:2181,zk3:2181',
156
timeout=30,
157
max_retries=5
158
)
159
160
# Step 2: Create SolrCloud client
161
print("Creating SolrCloud client...")
162
solr_cloud = pysolr.SolrCloud(
163
zookeeper=zk,
164
collection='my_distributed_collection',
165
timeout=60,
166
retry_count=3,
167
retry_timeout=1.0,
168
auth=('solr_user', 'solr_password'),
169
always_commit=True
170
)
171
172
try:
173
# Step 3: Test connectivity
174
print("Testing SolrCloud connectivity...")
175
response = solr_cloud.ping()
176
print("SolrCloud is healthy")
177
178
# Step 4: Index documents (automatically routed to leader)
179
print("Indexing documents...")
180
docs = [
181
{'id': 'doc1', 'title': 'First Document', 'content': 'Content for first document'},
182
{'id': 'doc2', 'title': 'Second Document', 'content': 'Content for second document'},
183
{'id': 'doc3', 'title': 'Third Document', 'content': 'Content for third document'}
184
]
185
solr_cloud.add(docs)
186
print(f"Indexed {len(docs)} documents")
187
188
# Step 5: Search across cluster (load balanced)
189
print("Searching documents...")
190
results = solr_cloud.search('*:*', rows=100)
191
print(f"Found {results.hits} total documents")
192
193
# Step 6: Search with distributed faceting
194
print("Searching with facets...")
195
results = solr_cloud.search(
196
'*:*',
197
facet=True,
198
facet_field='title',
199
facet_mincount=1
200
)
201
print(f"Facet results: {results.facets}")
202
203
# Step 7: Demonstrate failover by getting multiple URLs
204
print("Available cluster nodes:")
205
hosts = zk.getHosts('my_distributed_collection')
206
for i, host in enumerate(hosts):
207
print(f" Node {i+1}: {host}")
208
209
except pysolr.SolrError as e:
210
print(f"SolrCloud operation failed: {e}")
211
print("This may indicate cluster issues or network problems")
212
213
finally:
214
# Cleanup is handled automatically by the clients
215
print("SolrCloud operations completed")
216
```
217
218
## High Availability Patterns
219
220
### Automatic Failover
221
222
SolrCloud clients automatically handle node failures and retry operations:
223
224
```python
225
import pysolr
226
227
zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')
228
solr_cloud = pysolr.SolrCloud(
229
zookeeper=zk,
230
collection='ha_collection',
231
retry_count=5,
232
retry_timeout=2.0
233
)
234
235
try:
236
# This will automatically retry on different nodes if one fails
237
results = solr_cloud.search('important_query')
238
239
# Updates will automatically find and use leader nodes
240
solr_cloud.add({'id': 'critical_doc', 'data': 'important_data'})
241
242
except pysolr.SolrError as e:
243
print(f"All nodes failed after retries: {e}")
244
```
245
246
### Load Balanced Queries
247
248
Distribute read queries across available replicas:
249
250
```python
251
import random
252
import pysolr
253
254
zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')
255
256
# Get available hosts for manual load balancing
257
hosts = zk.getHosts('my_collection')
258
print(f"Load balancing across {len(hosts)} nodes")
259
260
# Create multiple clients for different purposes
261
read_clients = []
262
for host in hosts:
263
client = pysolr.Solr(f"{host}/my_collection", timeout=30)
264
read_clients.append(client)
265
266
# Use SolrCloud client for updates (handles leader detection)
267
update_client = pysolr.SolrCloud(zk, 'my_collection')
268
269
# Distribute read queries
270
for i in range(10):
271
client = random.choice(read_clients)
272
results = client.search(f'query_{i}')
273
print(f"Query {i} executed on {client.url}")
274
275
# All updates go through SolrCloud client
276
update_client.add({'id': f'doc_{i}', 'content': f'Document {i}'})
277
```
278
279
## Error Handling and Monitoring
280
281
```python
282
import pysolr
283
import time
284
285
zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')
286
solr_cloud = pysolr.SolrCloud(zk, 'monitored_collection')
287
288
def monitor_cluster_health():
289
"""Monitor SolrCloud cluster health."""
290
try:
291
# Check ZooKeeper connectivity
292
hosts = zk.getHosts('monitored_collection')
293
if not hosts:
294
print("WARNING: No active hosts found")
295
return False
296
297
# Check individual node health
298
healthy_nodes = 0
299
for host in hosts:
300
try:
301
client = pysolr.Solr(f"{host}/monitored_collection")
302
client.ping()
303
healthy_nodes += 1
304
except pysolr.SolrError:
305
print(f"WARNING: Node {host} is unhealthy")
306
307
print(f"Cluster health: {healthy_nodes}/{len(hosts)} nodes healthy")
308
return healthy_nodes > 0
309
310
except Exception as e:
311
print(f"Cluster monitoring failed: {e}")
312
return False
313
314
# Monitor cluster periodically
315
while True:
316
if monitor_cluster_health():
317
try:
318
# Perform operations when cluster is healthy
319
results = solr_cloud.search('*:*', rows=0) # Count query
320
print(f"Total documents in cluster: {results.hits}")
321
except pysolr.SolrError as e:
322
print(f"Cluster operation failed: {e}")
323
else:
324
print("Cluster is unhealthy, skipping operations")
325
326
time.sleep(60) # Check every minute
327
```
328
329
## Dependencies
330
331
SolrCloud functionality requires the kazoo library:
332
333
```bash
334
# Install with SolrCloud support
335
pip install pysolr[solrcloud]
336
337
# Or install kazoo separately
338
pip install kazoo>=2.5.0
339
```
340
341
```python
342
# Check for SolrCloud support
343
try:
344
import pysolr
345
zk = pysolr.ZooKeeper('localhost:2181')
346
print("SolrCloud support is available")
347
except RuntimeError:
348
print("SolrCloud support requires 'kazoo' library")
349
print("Install with: pip install pysolr[solrcloud]")
350
```