or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cache-messaging.mdcloud-services.mdcompose.mdcore-containers.mddatabase-containers.mdindex.mdsearch-analytics.mdwaiting-strategies.mdweb-testing.md

search-analytics.mddocs/

0

# Search and Analytics Containers

1

2

Specialized containers for search engines, analytics platforms, and data processing including Elasticsearch, OpenSearch, ClickHouse, and vector databases for full-text search, analytics, and AI/ML workloads.

3

4

## Capabilities

5

6

### Elasticsearch Container

7

8

Elasticsearch distributed search and analytics engine container with configurable cluster settings and security options.

9

10

```python { .api }

11

class ElasticSearchContainer:

12

def __init__(

13

self,

14

image: str = "elasticsearch:8.8.0",

15

port: int = 9200,

16

**kwargs: Any

17

):

18

"""

19

Initialize Elasticsearch container.

20

21

Args:

22

image: Elasticsearch Docker image

23

port: HTTP port (default 9200)

24

**kwargs: Additional container options

25

"""

26

27

def get_url(self) -> str:

28

"""

29

Get Elasticsearch HTTP URL.

30

31

Returns:

32

Elasticsearch HTTP URL string

33

"""

34

```

35

36

### OpenSearch Container

37

38

OpenSearch distributed search and analytics engine container with dashboard support and security configuration.

39

40

```python { .api }

41

class OpenSearchContainer:

42

def __init__(

43

self,

44

image: str = "opensearchproject/opensearch:latest",

45

port: int = 9200,

46

**kwargs: Any

47

):

48

"""

49

Initialize OpenSearch container.

50

51

Args:

52

image: OpenSearch Docker image

53

port: HTTP port (default 9200)

54

**kwargs: Additional container options

55

"""

56

57

def get_url(self) -> str:

58

"""

59

Get OpenSearch HTTP URL.

60

61

Returns:

62

OpenSearch HTTP URL string

63

"""

64

```

65

66

### Vector Database Containers

67

68

Modern vector databases for similarity search, embeddings, and AI/ML applications.

69

70

```python { .api }

71

class ChromaContainer:

72

def __init__(

73

self,

74

image: str = "chromadb/chroma:latest",

75

port: int = 8000,

76

**kwargs: Any

77

):

78

"""

79

Initialize Chroma vector database container.

80

81

Args:

82

image: Chroma Docker image

83

port: HTTP port (default 8000)

84

**kwargs: Additional container options

85

"""

86

87

def get_url(self) -> str:

88

"""

89

Get Chroma HTTP URL.

90

91

Returns:

92

Chroma HTTP URL string

93

"""

94

95

class WeaviateContainer:

96

def __init__(

97

self,

98

image: str = "semitechnologies/weaviate:latest",

99

port: int = 8080,

100

**kwargs: Any

101

):

102

"""

103

Initialize Weaviate vector database container.

104

105

Args:

106

image: Weaviate Docker image

107

port: HTTP port (default 8080)

108

**kwargs: Additional container options

109

"""

110

111

def get_url(self) -> str:

112

"""

113

Get Weaviate HTTP URL.

114

115

Returns:

116

Weaviate HTTP URL string

117

"""

118

119

class QdrantContainer:

120

def __init__(

121

self,

122

image: str = "qdrant/qdrant:latest",

123

port: int = 6333,

124

**kwargs: Any

125

):

126

"""

127

Initialize Qdrant vector database container.

128

129

Args:

130

image: Qdrant Docker image

131

port: HTTP port (default 6333)

132

**kwargs: Additional container options

133

"""

134

135

def get_url(self) -> str:

136

"""

137

Get Qdrant HTTP URL.

138

139

Returns:

140

Qdrant HTTP URL string

141

"""

142

143

class MilvusContainer:

144

def __init__(

145

self,

146

image: str = "milvusdb/milvus:latest",

147

port: int = 19530,

148

**kwargs: Any

149

):

150

"""

151

Initialize Milvus vector database container.

152

153

Args:

154

image: Milvus Docker image

155

port: gRPC port (default 19530)

156

**kwargs: Additional container options

157

"""

158

159

def get_connection_args(self) -> dict:

160

"""

161

Get Milvus connection arguments.

162

163

Returns:

164

Dictionary with host and port for Milvus client

165

"""

166

```

167

168

### Analytics Database Containers

169

170

High-performance analytics and columnar databases for OLAP workloads.

171

172

```python { .api }

173

class ClickHouseContainer:

174

def __init__(

175

self,

176

image: str = "clickhouse/clickhouse-server:latest",

177

port: int = 8123,

178

username: str = "default",

179

password: str = "",

180

dbname: str = "default",

181

**kwargs: Any

182

):

183

"""

184

Initialize ClickHouse container.

185

186

Args:

187

image: ClickHouse Docker image

188

port: HTTP port (default 8123)

189

username: Database username

190

password: Database password

191

dbname: Database name

192

**kwargs: Additional container options

193

"""

194

195

def get_connection_url(self) -> str:

196

"""

197

Get ClickHouse connection URL.

198

199

Returns:

200

ClickHouse connection URL string

201

"""

202

203

class TrinoContainer:

204

def __init__(

205

self,

206

image: str = "trinodb/trino:latest",

207

port: int = 8080,

208

**kwargs: Any

209

):

210

"""

211

Initialize Trino distributed query engine container.

212

213

Args:

214

image: Trino Docker image

215

port: HTTP port (default 8080)

216

**kwargs: Additional container options

217

"""

218

219

def get_connection_url(self) -> str:

220

"""

221

Get Trino connection URL.

222

223

Returns:

224

Trino connection URL string

225

"""

226

```

227

228

## Usage Examples

229

230

### Elasticsearch Full-Text Search

231

232

```python

233

from testcontainers.elasticsearch import ElasticSearchContainer

234

from elasticsearch import Elasticsearch

235

236

with ElasticSearchContainer("elasticsearch:8.8.0") as es_container:

237

# Get Elasticsearch client

238

es_url = es_container.get_url()

239

es_client = Elasticsearch([es_url])

240

241

# Wait for cluster to be ready

242

es_client.cluster.health(wait_for_status="yellow", timeout="30s")

243

244

# Create an index

245

index_name = "test_index"

246

es_client.indices.create(index=index_name, ignore=400)

247

248

# Index some documents

249

documents = [

250

{"title": "Elasticsearch Guide", "content": "Learn about search and analytics"},

251

{"title": "Python Testing", "content": "Unit testing with containers"},

252

{"title": "Data Analytics", "content": "Big data processing and analysis"}

253

]

254

255

for i, doc in enumerate(documents, 1):

256

es_client.index(index=index_name, id=i, body=doc)

257

258

# Refresh index

259

es_client.indices.refresh(index=index_name)

260

261

# Search documents

262

search_query = {

263

"query": {

264

"match": {

265

"content": "analytics"

266

}

267

}

268

}

269

270

results = es_client.search(index=index_name, body=search_query)

271

print(f"Found {results['hits']['total']['value']} matching documents")

272

273

for hit in results['hits']['hits']:

274

print(f"- {hit['_source']['title']}: {hit['_score']}")

275

```

276

277

### Vector Database with Chroma

278

279

```python

280

from testcontainers.chroma import ChromaContainer

281

import chromadb

282

import numpy as np

283

284

with ChromaContainer() as chroma_container:

285

# Get Chroma client

286

chroma_url = chroma_container.get_url()

287

client = chromadb.HttpClient(host=chroma_url.split("://")[1].split(":")[0],

288

port=int(chroma_url.split(":")[2]))

289

290

# Create collection

291

collection = client.create_collection("test_collection")

292

293

# Add embeddings

294

embeddings = [

295

[0.1, 0.2, 0.3, 0.4],

296

[0.5, 0.6, 0.7, 0.8],

297

[0.9, 0.1, 0.2, 0.3]

298

]

299

300

documents = [

301

"First document about AI",

302

"Second document about machine learning",

303

"Third document about data science"

304

]

305

306

ids = ["doc1", "doc2", "doc3"]

307

308

collection.add(

309

embeddings=embeddings,

310

documents=documents,

311

ids=ids

312

)

313

314

# Query similar vectors

315

query_embedding = [0.1, 0.25, 0.35, 0.45]

316

results = collection.query(

317

query_embeddings=[query_embedding],

318

n_results=2

319

)

320

321

print("Similar documents:")

322

for i, doc in enumerate(results['documents'][0]):

323

distance = results['distances'][0][i]

324

print(f"- {doc} (distance: {distance:.4f})")

325

```

326

327

### ClickHouse Analytics

328

329

```python

330

from testcontainers.clickhouse import ClickHouseContainer

331

import clickhouse_driver

332

333

with ClickHouseContainer() as clickhouse:

334

# Connect to ClickHouse

335

connection_url = clickhouse.get_connection_url()

336

client = clickhouse_driver.Client.from_url(connection_url)

337

338

# Create table for analytics

339

client.execute("""

340

CREATE TABLE IF NOT EXISTS events (

341

timestamp DateTime,

342

user_id UInt32,

343

event_type String,

344

value Float64

345

) ENGINE = MergeTree()

346

ORDER BY timestamp

347

""")

348

349

# Insert sample data

350

import datetime

351

import random

352

353

events_data = []

354

base_time = datetime.datetime.now()

355

356

for i in range(1000):

357

events_data.append((

358

base_time + datetime.timedelta(minutes=i),

359

random.randint(1, 100),

360

random.choice(['click', 'view', 'purchase']),

361

random.uniform(1.0, 100.0)

362

))

363

364

client.execute(

365

"INSERT INTO events (timestamp, user_id, event_type, value) VALUES",

366

events_data

367

)

368

369

# Run analytics queries

370

# Daily event counts

371

daily_stats = client.execute("""

372

SELECT

373

toDate(timestamp) as date,

374

event_type,

375

count() as events,

376

sum(value) as total_value

377

FROM events

378

GROUP BY date, event_type

379

ORDER BY date, event_type

380

""")

381

382

print("Daily event statistics:")

383

for date, event_type, count, total in daily_stats:

384

print(f"{date} {event_type}: {count} events, total value: {total:.2f}")

385

386

# Top users by activity

387

top_users = client.execute("""

388

SELECT

389

user_id,

390

count() as activity_count,

391

sum(value) as total_value

392

FROM events

393

GROUP BY user_id

394

ORDER BY activity_count DESC

395

LIMIT 5

396

""")

397

398

print("\nTop users by activity:")

399

for user_id, count, total in top_users:

400

print(f"User {user_id}: {count} events, total value: {total:.2f}")

401

```

402

403

### Multi-Engine Search Setup

404

405

```python

406

from testcontainers.elasticsearch import ElasticSearchContainer

407

from testcontainers.opensearch import OpenSearchContainer

408

from testcontainers.chroma import ChromaContainer

409

from testcontainers.core.network import Network

410

411

# Create network for search engines

412

with Network() as network:

413

# Start multiple search engines

414

with ElasticSearchContainer() as elasticsearch, \

415

OpenSearchContainer() as opensearch, \

416

ChromaContainer() as chroma:

417

418

# Connect to network

419

elasticsearch.with_network(network).with_network_aliases("elasticsearch")

420

opensearch.with_network(network).with_network_aliases("opensearch")

421

chroma.with_network(network).with_network_aliases("chroma")

422

423

# Get service URLs

424

es_url = elasticsearch.get_url()

425

os_url = opensearch.get_url()

426

chroma_url = chroma.get_url()

427

428

print(f"Elasticsearch: {es_url}")

429

print(f"OpenSearch: {os_url}")

430

print(f"Chroma: {chroma_url}")

431

432

# Use multiple search engines for different use cases

433

# Elasticsearch for structured search

434

# OpenSearch for log analytics

435

# Chroma for vector similarity search

436

```

437

438

### Trino Distributed Query Engine

439

440

```python

441

from testcontainers.trino import TrinoContainer

442

import trino

443

444

with TrinoContainer() as trino_container:

445

connection_url = trino_container.get_connection_url()

446

447

# Connect to Trino

448

conn = trino.dbapi.connect(

449

host=connection_url.split("://")[1].split(":")[0],

450

port=int(connection_url.split(":")[2]),

451

user="test"

452

)

453

454

cursor = conn.cursor()

455

456

# Query information schema

457

cursor.execute("SHOW CATALOGS")

458

catalogs = cursor.fetchall()

459

print("Available catalogs:")

460

for catalog in catalogs:

461

print(f"- {catalog[0]}")

462

463

# Create memory table for testing

464

cursor.execute("""

465

CREATE TABLE memory.default.sales AS

466

SELECT * FROM (VALUES

467

('2023-01-01', 'Product A', 100.0),

468

('2023-01-02', 'Product B', 150.0),

469

('2023-01-03', 'Product A', 200.0)

470

) AS t(date, product, amount)

471

""")

472

473

# Query the data

474

cursor.execute("""

475

SELECT product, sum(amount) as total_sales

476

FROM memory.default.sales

477

GROUP BY product

478

ORDER BY total_sales DESC

479

""")

480

481

results = cursor.fetchall()

482

print("\nSales by product:")

483

for product, total in results:

484

print(f"{product}: ${total}")

485

```

486

487

### Vector Similarity Search Comparison

488

489

```python

490

from testcontainers.chroma import ChromaContainer

491

from testcontainers.weaviate import WeaviateContainer

492

from testcontainers.qdrant import QdrantContainer

493

import numpy as np

494

495

# Generate sample embeddings

496

def generate_embeddings(n_docs=100, dim=384):

497

"""Generate random embeddings for testing."""

498

return np.random.random((n_docs, dim)).tolist()

499

500

embeddings = generate_embeddings()

501

documents = [f"Document {i}" for i in range(len(embeddings))]

502

503

# Test with multiple vector databases

504

with ChromaContainer() as chroma, \

505

WeaviateContainer() as weaviate, \

506

QdrantContainer() as qdrant:

507

508

print("Testing vector similarity search across databases...")

509

510

# Chroma setup

511

import chromadb

512

chroma_client = chromadb.HttpClient(host="localhost", port=8000) # Simplified

513

chroma_collection = chroma_client.create_collection("test")

514

chroma_collection.add(

515

embeddings=embeddings,

516

documents=documents,

517

ids=[str(i) for i in range(len(documents))]

518

)

519

520

# Query all databases with same vector

521

query_vector = embeddings[0] # Use first document as query

522

523

# Chroma query

524

chroma_results = chroma_collection.query(

525

query_embeddings=[query_vector],

526

n_results=5

527

)

528

529

print(f"Chroma found {len(chroma_results['documents'][0])} similar documents")

530

531

# Compare performance and results

532

print("Vector database comparison complete")

533

```