Database for AI powered by a storage format optimized for deep-learning applications.
75
Evaluation — 75%
↑ 1.59xAgent success when using this tile
TQL (Tensor Query Language) provides SQL-like syntax optimized for tensor operations, enabling complex data filtering, aggregation, and transformation across datasets. The query system supports both immediate execution and prepared statements for parameterized queries.
Execute TQL queries with immediate results or asynchronous processing for large datasets.
def query(query: str, token: Optional[str] = None, creds: Optional[Dict[str, str]] = None) -> DatasetView:
"""
Execute TQL query and return results.
Parameters:
- query: TQL query string
- token: Activeloop authentication token
- creds: Storage credentials dictionary
Returns:
DatasetView: Query result view
"""
def query_async(query: str, token: Optional[str] = None, creds: Optional[Dict[str, str]] = None) -> Future[DatasetView]:
"""
Execute TQL query asynchronously.
Parameters:
- query: TQL query string
- token: Activeloop authentication token
- creds: Storage credentials dictionary
Returns:
Future[DatasetView]: Future resolving to query result view
"""Create prepared statements for efficient execution of parameterized queries with variable substitution.
def prepare_query(query: str, token: Optional[str] = None, creds: Optional[Dict[str, str]] = None) -> Executor:
"""
Prepare parameterized query for efficient repeated execution.
Parameters:
- query: TQL query string with parameter placeholders
- token: Activeloop authentication token
- creds: Storage credentials dictionary
Returns:
Executor: Prepared query executor
"""
class Executor:
"""Prepared query executor for parameterized queries."""
def get_query_string(self) -> str:
"""
Get the prepared query string.
Returns:
str: Original query string with parameter placeholders
"""
def run_single(self, parameters: Dict[str, Any]) -> DatasetView:
"""
Execute prepared query with single parameter set.
Parameters:
- parameters: Dictionary mapping parameter names to values
Returns:
DatasetView: Query result view
"""
def run_single_async(self, parameters: Dict[str, Any]) -> Future[DatasetView]:
"""
Execute prepared query asynchronously with single parameter set.
Parameters:
- parameters: Dictionary mapping parameter names to values
Returns:
Future[DatasetView]: Future resolving to query result view
"""
def run_batch(self, parameters: List[Dict[str, Any]]) -> List[DatasetView]:
"""
Execute prepared query with multiple parameter sets.
Parameters:
- parameters: List of parameter dictionaries
Returns:
List[DatasetView]: List of query result views
"""
def run_batch_async(self, parameters: List[Dict[str, Any]]) -> Future[List[DatasetView]]:
"""
Execute prepared query asynchronously with multiple parameter sets.
Parameters:
- parameters: List of parameter dictionaries
Returns:
Future[List[DatasetView]]: Future resolving to list of query result views
"""Analyze and explain query execution plans for optimization and debugging.
def explain_query(query: str, token: Optional[str] = None, creds: Optional[Dict[str, str]] = None) -> ExplainQueryResult:
"""
Explain query execution plan.
Parameters:
- query: TQL query string to analyze
- token: Activeloop authentication token
- creds: Storage credentials dictionary
Returns:
ExplainQueryResult: Query execution plan and statistics
"""
class ExplainQueryResult:
"""Query execution plan and analysis."""
def __str__(self) -> str:
"""
Get human-readable explanation of query plan.
Returns:
str: Formatted query execution plan
"""
def to_dict(self) -> Dict[str, Any]:
"""
Get query plan as structured data.
Returns:
Dict[str, Any]: Dictionary containing execution plan details
"""DatasetView objects provide additional query and analysis capabilities on query results.
class DatasetView:
"""Query result view with additional query capabilities."""
schema: SchemaView
def query(self, query: str) -> DatasetView:
"""
Execute nested query on this view.
Parameters:
- query: TQL query string
Returns:
DatasetView: Nested query result view
"""
def prepare_query(self, query: str) -> Executor:
"""
Prepare parameterized query on this view.
Parameters:
- query: TQL query string with parameter placeholders
Returns:
Executor: Prepared query executor
"""
def explain_query(self, query: str) -> ExplainQueryResult:
"""
Explain query execution plan on this view.
Parameters:
- query: TQL query string to analyze
Returns:
ExplainQueryResult: Query execution plan and statistics
"""
def summary(self) -> str:
"""
Get summary statistics of the dataset view.
Returns:
str: Summary statistics including row count, column info, etc.
"""
def batches(self, batch_size: int = 1) -> Iterator[Dict[str, Any]]:
"""
Iterate over view data in batches.
Parameters:
- batch_size: Number of rows per batch
Returns:
Iterator[Dict[str, Any]]: Iterator yielding batches as dictionaries
"""Register custom Python functions for use in TQL queries with automatic type inference.
def register_function(function: Callable) -> None:
"""
Register Python function for use in TQL queries.
Parameters:
- function: Python function to register
"""
def get_max_num_parallel_queries() -> int:
"""
Get maximum number of parallel queries allowed.
Returns:
int: Maximum parallel query limit
"""
def set_max_num_parallel_queries(num: int) -> None:
"""
Set maximum number of parallel queries allowed.
Parameters:
- num: Maximum parallel query limit
"""import deeplake
# Simple SELECT query
results = deeplake.query('SELECT * FROM "s3://my-bucket/dataset" WHERE label == "cat"')
# Access query results
print(f"Found {len(results)} cat images")
for row in results:
print(f"Image: {row['image_path']}, Label: {row['label']}")
# Query with aggregation
stats = deeplake.query('SELECT label, COUNT(*) as count FROM "s3://my-bucket/dataset" GROUP BY label')
for row in stats:
print(f"Label: {row['label']}, Count: {row['count']}")
# Query with filtering and ordering
high_confidence = deeplake.query('''
SELECT image_path, confidence
FROM "s3://my-bucket/dataset"
WHERE confidence > 0.9
ORDER BY confidence DESC
LIMIT 10
''')# Prepare parameterized query
executor = deeplake.prepare_query('''
SELECT * FROM "s3://my-bucket/dataset"
WHERE label == $label AND confidence > $min_confidence
''')
# Execute with different parameters
cats = executor.run_single({"label": "cat", "min_confidence": 0.8})
dogs = executor.run_single({"label": "dog", "min_confidence": 0.8})
# Batch execution
params_list = [
{"label": "cat", "min_confidence": 0.9},
{"label": "dog", "min_confidence": 0.9},
{"label": "bird", "min_confidence": 0.9}
]
results_list = executor.run_batch(params_list)
for i, results in enumerate(results_list):
label = params_list[i]["label"]
print(f"High confidence {label} images: {len(results)}")# Complex filtering with multiple conditions
complex_query = deeplake.query('''
SELECT image_path, embeddings, metadata
FROM "s3://my-bucket/dataset"
WHERE label IN ("cat", "dog")
AND confidence > 0.85
AND width > 224
AND height > 224
''')
# Similarity search using embedding vectors
similar_images = deeplake.query('''
SELECT image_path,
COSINE_SIMILARITY(embeddings, $target_embedding) as similarity
FROM "s3://my-bucket/dataset"
WHERE COSINE_SIMILARITY(embeddings, $target_embedding) > 0.8
ORDER BY similarity DESC
''', parameters={"target_embedding": target_vector})
# Text search in descriptions
text_results = deeplake.query('''
SELECT * FROM "s3://my-bucket/dataset"
WHERE CONTAINS(description, "outdoor scene")
''')
# Geospatial queries
location_results = deeplake.query('''
SELECT * FROM "s3://my-bucket/dataset"
WHERE latitude BETWEEN 40.0 AND 41.0
AND longitude BETWEEN -74.0 AND -73.0
''')# Analyze query performance
query_str = 'SELECT * FROM "s3://my-bucket/dataset" WHERE confidence > 0.9'
explanation = deeplake.explain_query(query_str)
print("Query Plan:")
print(explanation)
# Get structured execution plan
plan_dict = explanation.to_dict()
print(f"Estimated rows: {plan_dict.get('estimated_rows', 'unknown')}")
print(f"Index usage: {plan_dict.get('uses_index', 'unknown')}")
# Query optimization suggestions
if not plan_dict.get('uses_index', False):
print("Consider creating an index on 'confidence' column for better performance")# Create initial view
base_view = deeplake.query('SELECT * FROM "s3://my-bucket/dataset" WHERE split == "train"')
# Query on the view
filtered_view = base_view.query('SELECT * WHERE confidence > 0.9')
# Further nested query
final_results = filtered_view.query('SELECT image_path, label ORDER BY confidence DESC LIMIT 100')
print(f"Top 100 high-confidence training images: {len(final_results)}")import numpy as np
# Register custom function for TQL
def normalize_scores(scores):
"""Normalize confidence scores to 0-1 range."""
scores_array = np.array(scores)
return (scores_array - scores_array.min()) / (scores_array.max() - scores_array.min())
deeplake.tql.register_function(normalize_scores)
# Use custom function in query
normalized_results = deeplake.query('''
SELECT image_path,
normalize_scores(confidence) as normalized_confidence
FROM "s3://my-bucket/dataset"
ORDER BY normalized_confidence DESC
''')import asyncio
async def process_multiple_queries():
queries = [
'SELECT * FROM "s3://my-bucket/dataset" WHERE label == "cat"',
'SELECT * FROM "s3://my-bucket/dataset" WHERE label == "dog"',
'SELECT * FROM "s3://my-bucket/dataset" WHERE label == "bird"'
]
# Execute queries concurrently
tasks = [deeplake.query_async(query) for query in queries]
results = await asyncio.gather(*tasks)
for i, result in enumerate(results):
query_type = queries[i].split('"')[3] # Extract label
print(f"Query {i+1} returned {len(result)} results")
return results
# Run async queries
results = asyncio.run(process_multiple_queries())# Set maximum parallel queries for performance tuning
current_max = deeplake.tql.get_max_num_parallel_queries()
print(f"Current max parallel queries: {current_max}")
# Increase for high-performance systems
deeplake.tql.set_max_num_parallel_queries(8)
# Query with performance monitoring
import time
start_time = time.time()
large_results = deeplake.query('''
SELECT * FROM "s3://my-bucket/large_dataset"
WHERE embedding_magnitude > 0.5
''')
end_time = time.time()
print(f"Query executed in {end_time - start_time:.2f} seconds")
print(f"Returned {len(large_results)} results")Install with Tessl CLI
npx tessl i tessl/pypi-deeplakedocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10