or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

admin-operations.mdcore-client.mddocument-processing.mdindex.mdsearch-operations.mdsolrcloud-support.mdutilities.md

solrcloud-support.mddocs/

0

# SolrCloud Support

1

2

SolrCloud cluster support with ZooKeeper coordination, automatic failover, leader detection, and distributed query handling across multiple Solr nodes for high availability and scalability.

3

4

## Capabilities

5

6

### SolrCloud Client

7

8

SolrCloud-aware client that extends the standard Solr client with cluster coordination capabilities.

9

10

```python { .api }

11

class SolrCloud(Solr):

12

def __init__(self, zookeeper, collection, decoder=None, encoder=None, timeout=60,

13

retry_count=5, retry_timeout=0.2, auth=None, verify=True, *args, **kwargs):

14

"""

15

Initialize a SolrCloud client with ZooKeeper coordination.

16

17

Parameters:

18

- zookeeper (ZooKeeper): ZooKeeper client instance for cluster coordination

19

- collection (str): SolrCloud collection name to work with

20

- decoder (json.JSONDecoder, optional): Custom JSON decoder instance

21

- encoder (json.JSONEncoder, optional): Custom JSON encoder instance

22

- timeout (int): Request timeout in seconds (default: 60)

23

- retry_count (int): Number of retry attempts for failed requests (default: 5)

24

- retry_timeout (float): Delay between retry attempts in seconds (default: 0.2)

25

- auth (tuple or requests auth object, optional): HTTP authentication

26

- verify (bool): Enable SSL certificate verification (default: True)

27

- *args, **kwargs: Additional arguments passed to parent Solr class

28

29

Raises:

30

SolrError: If ZooKeeper connection or collection access fails

31

"""

32

```

33

34

Usage:

35

36

```python

37

import pysolr

38

39

# Create ZooKeeper client

40

zk = pysolr.ZooKeeper('localhost:2181')

41

42

# Create SolrCloud client

43

solr_cloud = pysolr.SolrCloud(

44

zookeeper=zk,

45

collection='my_collection',

46

timeout=30,

47

retry_count=3,

48

auth=('username', 'password')

49

)

50

51

# Use like regular Solr client with automatic failover

52

results = solr_cloud.search('*:*')

53

solr_cloud.add([{'id': 'doc1', 'title': 'Test Document'}])

54

```

55

56

### ZooKeeper Coordination

57

58

ZooKeeper client for managing cluster state, node discovery, and leader election in SolrCloud deployments.

59

60

```python { .api }

61

class ZooKeeper:

62

def __init__(self, zkServerAddress, timeout=15, max_retries=-1, kazoo_client=None):

63

"""

64

Initialize ZooKeeper client for SolrCloud coordination.

65

66

Parameters:

67

- zkServerAddress (str): ZooKeeper server address (e.g., 'localhost:2181' or 'zk1:2181,zk2:2181,zk3:2181')

68

- timeout (int): Connection timeout in seconds (default: 15)

69

- max_retries (int): Maximum retry attempts (-1 for unlimited) (default: -1)

70

- kazoo_client (KazooClient, optional): Custom Kazoo client instance

71

72

Raises:

73

RuntimeError: If kazoo library is not installed

74

"""

75

76

def getHosts(self, collname, only_leader=False, seen_aliases=None):

77

"""

78

Get list of active Solr hosts for a collection.

79

80

Parameters:

81

- collname (str): Collection name or alias

82

- only_leader (bool): Return only leader nodes (default: False)

83

- seen_aliases (list, optional): Track aliases to prevent circular references

84

85

Returns:

86

list: List of active Solr base URLs for the collection

87

88

Raises:

89

SolrError: If collection is unknown or no active hosts found

90

"""

91

92

def getRandomURL(self, collname, only_leader=False):

93

"""

94

Get a random active Solr URL for load balancing.

95

96

Parameters:

97

- collname (str): Collection name or alias

98

- only_leader (bool): Return only leader nodes (default: False)

99

100

Returns:

101

str: Complete Solr URL including collection path

102

103

Raises:

104

SolrError: If no active shards are available

105

"""

106

107

def getLeaderURL(self, collname):

108

"""

109

Get a leader node URL for update operations.

110

111

Parameters:

112

- collname (str): Collection name or alias

113

114

Returns:

115

str: Complete Solr URL for a leader node

116

117

Raises:

118

SolrError: If no leader nodes are available

119

"""

120

```

121

122

Usage:

123

124

```python

125

import pysolr

126

127

# Initialize ZooKeeper client

128

zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')

129

130

# Get all active hosts for a collection

131

hosts = zk.getHosts('my_collection')

132

print(f"Active hosts: {hosts}")

133

134

# Get only leader hosts (for updates)

135

leaders = zk.getHosts('my_collection', only_leader=True)

136

print(f"Leader hosts: {leaders}")

137

138

# Get random URL for load balancing

139

random_url = zk.getRandomURL('my_collection')

140

print(f"Random URL: {random_url}")

141

142

# Get leader URL for updates

143

leader_url = zk.getLeaderURL('my_collection')

144

print(f"Leader URL: {leader_url}")

145

```

146

147

## Complete SolrCloud Setup Example

148

149

```python

150

import pysolr

151

152

# Step 1: Initialize ZooKeeper client

153

print("Connecting to ZooKeeper...")

154

zk = pysolr.ZooKeeper(

155

zkServerAddress='zk1:2181,zk2:2181,zk3:2181',

156

timeout=30,

157

max_retries=5

158

)

159

160

# Step 2: Create SolrCloud client

161

print("Creating SolrCloud client...")

162

solr_cloud = pysolr.SolrCloud(

163

zookeeper=zk,

164

collection='my_distributed_collection',

165

timeout=60,

166

retry_count=3,

167

retry_timeout=1.0,

168

auth=('solr_user', 'solr_password'),

169

always_commit=True

170

)

171

172

try:

173

# Step 3: Test connectivity

174

print("Testing SolrCloud connectivity...")

175

response = solr_cloud.ping()

176

print("SolrCloud is healthy")

177

178

# Step 4: Index documents (automatically routed to leader)

179

print("Indexing documents...")

180

docs = [

181

{'id': 'doc1', 'title': 'First Document', 'content': 'Content for first document'},

182

{'id': 'doc2', 'title': 'Second Document', 'content': 'Content for second document'},

183

{'id': 'doc3', 'title': 'Third Document', 'content': 'Content for third document'}

184

]

185

solr_cloud.add(docs)

186

print(f"Indexed {len(docs)} documents")

187

188

# Step 5: Search across cluster (load balanced)

189

print("Searching documents...")

190

results = solr_cloud.search('*:*', rows=100)

191

print(f"Found {results.hits} total documents")

192

193

# Step 6: Search with distributed faceting

194

print("Searching with facets...")

195

results = solr_cloud.search(

196

'*:*',

197

facet=True,

198

facet_field='title',

199

facet_mincount=1

200

)

201

print(f"Facet results: {results.facets}")

202

203

# Step 7: Demonstrate failover by getting multiple URLs

204

print("Available cluster nodes:")

205

hosts = zk.getHosts('my_distributed_collection')

206

for i, host in enumerate(hosts):

207

print(f" Node {i+1}: {host}")

208

209

except pysolr.SolrError as e:

210

print(f"SolrCloud operation failed: {e}")

211

print("This may indicate cluster issues or network problems")

212

213

finally:

214

# Cleanup is handled automatically by the clients

215

print("SolrCloud operations completed")

216

```

217

218

## High Availability Patterns

219

220

### Automatic Failover

221

222

SolrCloud clients automatically handle node failures and retry operations:

223

224

```python

225

import pysolr

226

227

zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')

228

solr_cloud = pysolr.SolrCloud(

229

zookeeper=zk,

230

collection='ha_collection',

231

retry_count=5,

232

retry_timeout=2.0

233

)

234

235

try:

236

# This will automatically retry on different nodes if one fails

237

results = solr_cloud.search('important_query')

238

239

# Updates will automatically find and use leader nodes

240

solr_cloud.add({'id': 'critical_doc', 'data': 'important_data'})

241

242

except pysolr.SolrError as e:

243

print(f"All nodes failed after retries: {e}")

244

```

245

246

### Load Balanced Queries

247

248

Distribute read queries across available replicas:

249

250

```python

251

import random

252

import pysolr

253

254

zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')

255

256

# Get available hosts for manual load balancing

257

hosts = zk.getHosts('my_collection')

258

print(f"Load balancing across {len(hosts)} nodes")

259

260

# Create multiple clients for different purposes

261

read_clients = []

262

for host in hosts:

263

client = pysolr.Solr(f"{host}/my_collection", timeout=30)

264

read_clients.append(client)

265

266

# Use SolrCloud client for updates (handles leader detection)

267

update_client = pysolr.SolrCloud(zk, 'my_collection')

268

269

# Distribute read queries

270

for i in range(10):

271

client = random.choice(read_clients)

272

results = client.search(f'query_{i}')

273

print(f"Query {i} executed on {client.url}")

274

275

# All updates go through SolrCloud client

276

update_client.add({'id': f'doc_{i}', 'content': f'Document {i}'})

277

```

278

279

## Error Handling and Monitoring

280

281

```python

282

import pysolr

283

import time

284

285

zk = pysolr.ZooKeeper('zk1:2181,zk2:2181,zk3:2181')

286

solr_cloud = pysolr.SolrCloud(zk, 'monitored_collection')

287

288

def monitor_cluster_health():

289

"""Monitor SolrCloud cluster health."""

290

try:

291

# Check ZooKeeper connectivity

292

hosts = zk.getHosts('monitored_collection')

293

if not hosts:

294

print("WARNING: No active hosts found")

295

return False

296

297

# Check individual node health

298

healthy_nodes = 0

299

for host in hosts:

300

try:

301

client = pysolr.Solr(f"{host}/monitored_collection")

302

client.ping()

303

healthy_nodes += 1

304

except pysolr.SolrError:

305

print(f"WARNING: Node {host} is unhealthy")

306

307

print(f"Cluster health: {healthy_nodes}/{len(hosts)} nodes healthy")

308

return healthy_nodes > 0

309

310

except Exception as e:

311

print(f"Cluster monitoring failed: {e}")

312

return False

313

314

# Monitor cluster periodically

315

while True:

316

if monitor_cluster_health():

317

try:

318

# Perform operations when cluster is healthy

319

results = solr_cloud.search('*:*', rows=0) # Count query

320

print(f"Total documents in cluster: {results.hits}")

321

except pysolr.SolrError as e:

322

print(f"Cluster operation failed: {e}")

323

else:

324

print("Cluster is unhealthy, skipping operations")

325

326

time.sleep(60) # Check every minute

327

```

328

329

## Dependencies

330

331

SolrCloud functionality requires the kazoo library:

332

333

```bash

334

# Install with SolrCloud support

335

pip install pysolr[solrcloud]

336

337

# Or install kazoo separately

338

pip install kazoo>=2.5.0

339

```

340

341

```python

342

# Check for SolrCloud support

343

try:

344

import pysolr

345

zk = pysolr.ZooKeeper('localhost:2181')

346

print("SolrCloud support is available")

347

except RuntimeError:

348

print("SolrCloud support requires 'kazoo' library")

349

print("Install with: pip install pysolr[solrcloud]")

350

```