Python library for throwaway instances of anything that can run in a Docker container
Specialized containers for search engines, analytics platforms, and data processing including Elasticsearch, OpenSearch, ClickHouse, and vector databases for full-text search, analytics, and AI/ML workloads.
Elasticsearch distributed search and analytics engine container with configurable cluster settings and security options.
class ElasticSearchContainer:
def __init__(
self,
image: str = "elasticsearch:8.8.0",
port: int = 9200,
**kwargs: Any
):
"""
Initialize Elasticsearch container.
Args:
image: Elasticsearch Docker image
port: HTTP port (default 9200)
**kwargs: Additional container options
"""
def get_url(self) -> str:
"""
Get Elasticsearch HTTP URL.
Returns:
Elasticsearch HTTP URL string
"""OpenSearch distributed search and analytics engine container with dashboard support and security configuration.
class OpenSearchContainer:
def __init__(
self,
image: str = "opensearchproject/opensearch:latest",
port: int = 9200,
**kwargs: Any
):
"""
Initialize OpenSearch container.
Args:
image: OpenSearch Docker image
port: HTTP port (default 9200)
**kwargs: Additional container options
"""
def get_url(self) -> str:
"""
Get OpenSearch HTTP URL.
Returns:
OpenSearch HTTP URL string
"""Modern vector databases for similarity search, embeddings, and AI/ML applications.
class ChromaContainer:
def __init__(
self,
image: str = "chromadb/chroma:latest",
port: int = 8000,
**kwargs: Any
):
"""
Initialize Chroma vector database container.
Args:
image: Chroma Docker image
port: HTTP port (default 8000)
**kwargs: Additional container options
"""
def get_url(self) -> str:
"""
Get Chroma HTTP URL.
Returns:
Chroma HTTP URL string
"""
class WeaviateContainer:
def __init__(
self,
image: str = "semitechnologies/weaviate:latest",
port: int = 8080,
**kwargs: Any
):
"""
Initialize Weaviate vector database container.
Args:
image: Weaviate Docker image
port: HTTP port (default 8080)
**kwargs: Additional container options
"""
def get_url(self) -> str:
"""
Get Weaviate HTTP URL.
Returns:
Weaviate HTTP URL string
"""
class QdrantContainer:
def __init__(
self,
image: str = "qdrant/qdrant:latest",
port: int = 6333,
**kwargs: Any
):
"""
Initialize Qdrant vector database container.
Args:
image: Qdrant Docker image
port: HTTP port (default 6333)
**kwargs: Additional container options
"""
def get_url(self) -> str:
"""
Get Qdrant HTTP URL.
Returns:
Qdrant HTTP URL string
"""
class MilvusContainer:
def __init__(
self,
image: str = "milvusdb/milvus:latest",
port: int = 19530,
**kwargs: Any
):
"""
Initialize Milvus vector database container.
Args:
image: Milvus Docker image
port: gRPC port (default 19530)
**kwargs: Additional container options
"""
def get_connection_args(self) -> dict:
"""
Get Milvus connection arguments.
Returns:
Dictionary with host and port for Milvus client
"""High-performance analytics and columnar databases for OLAP workloads.
class ClickHouseContainer:
def __init__(
self,
image: str = "clickhouse/clickhouse-server:latest",
port: int = 8123,
username: str = "default",
password: str = "",
dbname: str = "default",
**kwargs: Any
):
"""
Initialize ClickHouse container.
Args:
image: ClickHouse Docker image
port: HTTP port (default 8123)
username: Database username
password: Database password
dbname: Database name
**kwargs: Additional container options
"""
def get_connection_url(self) -> str:
"""
Get ClickHouse connection URL.
Returns:
ClickHouse connection URL string
"""
class TrinoContainer:
def __init__(
self,
image: str = "trinodb/trino:latest",
port: int = 8080,
**kwargs: Any
):
"""
Initialize Trino distributed query engine container.
Args:
image: Trino Docker image
port: HTTP port (default 8080)
**kwargs: Additional container options
"""
def get_connection_url(self) -> str:
"""
Get Trino connection URL.
Returns:
Trino connection URL string
"""from testcontainers.elasticsearch import ElasticSearchContainer
from elasticsearch import Elasticsearch
with ElasticSearchContainer("elasticsearch:8.8.0") as es_container:
# Get Elasticsearch client
es_url = es_container.get_url()
es_client = Elasticsearch([es_url])
# Wait for cluster to be ready
es_client.cluster.health(wait_for_status="yellow", timeout="30s")
# Create an index
index_name = "test_index"
es_client.indices.create(index=index_name, ignore=400)
# Index some documents
documents = [
{"title": "Elasticsearch Guide", "content": "Learn about search and analytics"},
{"title": "Python Testing", "content": "Unit testing with containers"},
{"title": "Data Analytics", "content": "Big data processing and analysis"}
]
for i, doc in enumerate(documents, 1):
es_client.index(index=index_name, id=i, body=doc)
# Refresh index
es_client.indices.refresh(index=index_name)
# Search documents
search_query = {
"query": {
"match": {
"content": "analytics"
}
}
}
results = es_client.search(index=index_name, body=search_query)
print(f"Found {results['hits']['total']['value']} matching documents")
for hit in results['hits']['hits']:
print(f"- {hit['_source']['title']}: {hit['_score']}")from testcontainers.chroma import ChromaContainer
import chromadb
import numpy as np
with ChromaContainer() as chroma_container:
# Get Chroma client
chroma_url = chroma_container.get_url()
client = chromadb.HttpClient(host=chroma_url.split("://")[1].split(":")[0],
port=int(chroma_url.split(":")[2]))
# Create collection
collection = client.create_collection("test_collection")
# Add embeddings
embeddings = [
[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 0.1, 0.2, 0.3]
]
documents = [
"First document about AI",
"Second document about machine learning",
"Third document about data science"
]
ids = ["doc1", "doc2", "doc3"]
collection.add(
embeddings=embeddings,
documents=documents,
ids=ids
)
# Query similar vectors
query_embedding = [0.1, 0.25, 0.35, 0.45]
results = collection.query(
query_embeddings=[query_embedding],
n_results=2
)
print("Similar documents:")
for i, doc in enumerate(results['documents'][0]):
distance = results['distances'][0][i]
print(f"- {doc} (distance: {distance:.4f})")from testcontainers.clickhouse import ClickHouseContainer
import clickhouse_driver
with ClickHouseContainer() as clickhouse:
# Connect to ClickHouse
connection_url = clickhouse.get_connection_url()
client = clickhouse_driver.Client.from_url(connection_url)
# Create table for analytics
client.execute("""
CREATE TABLE IF NOT EXISTS events (
timestamp DateTime,
user_id UInt32,
event_type String,
value Float64
) ENGINE = MergeTree()
ORDER BY timestamp
""")
# Insert sample data
import datetime
import random
events_data = []
base_time = datetime.datetime.now()
for i in range(1000):
events_data.append((
base_time + datetime.timedelta(minutes=i),
random.randint(1, 100),
random.choice(['click', 'view', 'purchase']),
random.uniform(1.0, 100.0)
))
client.execute(
"INSERT INTO events (timestamp, user_id, event_type, value) VALUES",
events_data
)
# Run analytics queries
# Daily event counts
daily_stats = client.execute("""
SELECT
toDate(timestamp) as date,
event_type,
count() as events,
sum(value) as total_value
FROM events
GROUP BY date, event_type
ORDER BY date, event_type
""")
print("Daily event statistics:")
for date, event_type, count, total in daily_stats:
print(f"{date} {event_type}: {count} events, total value: {total:.2f}")
# Top users by activity
top_users = client.execute("""
SELECT
user_id,
count() as activity_count,
sum(value) as total_value
FROM events
GROUP BY user_id
ORDER BY activity_count DESC
LIMIT 5
""")
print("\nTop users by activity:")
for user_id, count, total in top_users:
print(f"User {user_id}: {count} events, total value: {total:.2f}")from testcontainers.elasticsearch import ElasticSearchContainer
from testcontainers.opensearch import OpenSearchContainer
from testcontainers.chroma import ChromaContainer
from testcontainers.core.network import Network
# Create network for search engines
with Network() as network:
# Start multiple search engines
with ElasticSearchContainer() as elasticsearch, \
OpenSearchContainer() as opensearch, \
ChromaContainer() as chroma:
# Connect to network
elasticsearch.with_network(network).with_network_aliases("elasticsearch")
opensearch.with_network(network).with_network_aliases("opensearch")
chroma.with_network(network).with_network_aliases("chroma")
# Get service URLs
es_url = elasticsearch.get_url()
os_url = opensearch.get_url()
chroma_url = chroma.get_url()
print(f"Elasticsearch: {es_url}")
print(f"OpenSearch: {os_url}")
print(f"Chroma: {chroma_url}")
# Use multiple search engines for different use cases
# Elasticsearch for structured search
# OpenSearch for log analytics
# Chroma for vector similarity searchfrom testcontainers.trino import TrinoContainer
import trino
with TrinoContainer() as trino_container:
connection_url = trino_container.get_connection_url()
# Connect to Trino
conn = trino.dbapi.connect(
host=connection_url.split("://")[1].split(":")[0],
port=int(connection_url.split(":")[2]),
user="test"
)
cursor = conn.cursor()
# Query information schema
cursor.execute("SHOW CATALOGS")
catalogs = cursor.fetchall()
print("Available catalogs:")
for catalog in catalogs:
print(f"- {catalog[0]}")
# Create memory table for testing
cursor.execute("""
CREATE TABLE memory.default.sales AS
SELECT * FROM (VALUES
('2023-01-01', 'Product A', 100.0),
('2023-01-02', 'Product B', 150.0),
('2023-01-03', 'Product A', 200.0)
) AS t(date, product, amount)
""")
# Query the data
cursor.execute("""
SELECT product, sum(amount) as total_sales
FROM memory.default.sales
GROUP BY product
ORDER BY total_sales DESC
""")
results = cursor.fetchall()
print("\nSales by product:")
for product, total in results:
print(f"{product}: ${total}")from testcontainers.chroma import ChromaContainer
from testcontainers.weaviate import WeaviateContainer
from testcontainers.qdrant import QdrantContainer
import numpy as np
# Generate sample embeddings
def generate_embeddings(n_docs=100, dim=384):
"""Generate random embeddings for testing."""
return np.random.random((n_docs, dim)).tolist()
embeddings = generate_embeddings()
documents = [f"Document {i}" for i in range(len(embeddings))]
# Test with multiple vector databases
with ChromaContainer() as chroma, \
WeaviateContainer() as weaviate, \
QdrantContainer() as qdrant:
print("Testing vector similarity search across databases...")
# Chroma setup
import chromadb
chroma_client = chromadb.HttpClient(host="localhost", port=8000) # Simplified
chroma_collection = chroma_client.create_collection("test")
chroma_collection.add(
embeddings=embeddings,
documents=documents,
ids=[str(i) for i in range(len(documents))]
)
# Query all databases with same vector
query_vector = embeddings[0] # Use first document as query
# Chroma query
chroma_results = chroma_collection.query(
query_embeddings=[query_vector],
n_results=5
)
print(f"Chroma found {len(chroma_results['documents'][0])} similar documents")
# Compare performance and results
print("Vector database comparison complete")Install with Tessl CLI
npx tessl i tessl/pypi-testcontainers