Database for AI powered by a storage format optimized for deep-learning applications.
75
Evaluation — 75%
↑ 1.59xAgent success when using this tile
Comprehensive row and column-based data access patterns with support for indexing, slicing, batch operations, and efficient data manipulation. Deep Lake provides both mutable and read-only access patterns optimized for ML workflows.
Dataset objects provide dictionary-like and array-like access to data with automatic type handling and optimization.
class Dataset:
"""Primary mutable dataset class."""
def __getitem__(self, key: Union[int, slice, str]) -> Union[Row, RowRange, Column]:
"""
Access dataset elements by index or name.
Parameters:
- key: Row index (int), row range (slice), or column name (str)
Returns:
- Row: Single row access (when key is int)
- RowRange: Multiple row access (when key is slice)
- Column: Column access (when key is str)
"""
def __len__(self) -> int:
"""Get number of rows in dataset."""
def append(self, data: Dict[str, Any]) -> None:
"""
Append new row to dataset.
Parameters:
- data: Dictionary mapping column names to values
"""
def extend(self, data: List[Dict[str, Any]]) -> None:
"""
Append multiple rows to dataset.
Parameters:
- data: List of dictionaries mapping column names to values
"""
def add_column(self, name: str, dtype: Type) -> None:
"""
Add new column to dataset.
Parameters:
- name: Column name
- dtype: Column data type
"""
def remove_column(self, name: str) -> None:
"""
Remove column from dataset.
Parameters:
- name: Column name to remove
"""
def rename_column(self, old_name: str, new_name: str) -> None:
"""
Rename existing column.
Parameters:
- old_name: Current column name
- new_name: New column name
"""
class ReadOnlyDataset:
"""Read-only dataset access."""
def __getitem__(self, key: Union[int, slice, str]) -> Union[RowView, RowRangeView, ColumnView]:
"""Access dataset elements (read-only)."""
def __len__(self) -> int:
"""Get number of rows in dataset."""
class DatasetView:
"""Query result view of dataset."""
def __getitem__(self, key: Union[int, slice, str]) -> Union[RowView, RowRangeView, ColumnView]:
"""Access query result elements."""
def __len__(self) -> int:
"""Get number of rows in view."""
def summary(self) -> str:
"""Get summary statistics of the dataset view."""Column objects provide typed access to homogeneous data with support for indexing, slicing, and batch operations.
class Column:
"""Mutable column access."""
name: str
metadata: Metadata
indexes: List[str]
def __getitem__(self, key: Union[int, slice, List[int]]) -> Any:
"""
Get column values by index.
Parameters:
- key: Row index (int), slice, or list of indices
Returns:
- Any: Single value or list of values
"""
def __setitem__(self, key: Union[int, slice, List[int]], value: Any) -> None:
"""
Set column values by index.
Parameters:
- key: Row index (int), slice, or list of indices
- value: Value(s) to set
"""
def __len__(self) -> int:
"""Get number of elements in column."""
def create_index(self, type: IndexType) -> None:
"""
Create index on column for query optimization.
Parameters:
- type: Index type specification
"""
def drop_index(self, name: str) -> None:
"""
Drop existing index.
Parameters:
- name: Index name to drop
"""
def get_async(self, index: int) -> Future[Any]:
"""
Get column value asynchronously.
Parameters:
- index: Row index
Returns:
Future[Any]: Future resolving to column value
"""
def set_async(self, index: int, value: Any) -> FutureVoid:
"""
Set column value asynchronously.
Parameters:
- index: Row index
- value: Value to set
Returns:
FutureVoid: Future completing when set operation is done
"""
def get_bytes(self, index: int) -> bytes:
"""
Get raw bytes representation of column value.
Parameters:
- index: Row index
Returns:
bytes: Raw bytes data
"""
def get_bytes_async(self, index: int) -> Future[bytes]:
"""
Get raw bytes representation asynchronously.
Parameters:
- index: Row index
Returns:
Future[bytes]: Future resolving to raw bytes data
"""
class ColumnView:
"""Read-only column access."""
name: str
metadata: ReadOnlyMetadata
indexes: List[str]
def __getitem__(self, key: Union[int, slice, List[int]]) -> Any:
"""Get column values by index (read-only)."""
def __len__(self) -> int:
"""Get number of elements in column."""
def get_async(self, index: int) -> Future[Any]:
"""Get column value asynchronously."""
def get_bytes(self, index: int) -> bytes:
"""Get raw bytes representation of column value."""
def get_bytes_async(self, index: int) -> Future[bytes]:
"""Get raw bytes representation asynchronously."""
def _links_info(self) -> Dict[str, Any]:
"""Get link information for linked columns."""
class ColumnDefinition:
"""Mutable column definition."""
name: str
dtype: Type
def drop(self) -> None:
"""Drop this column from dataset."""
def rename(self, new_name: str) -> None:
"""Rename this column."""
class ColumnDefinitionView:
"""Read-only column definition."""
name: str
dtype: TypeRow objects provide dictionary-like access to individual records with type-aware value handling.
class Row:
"""Mutable single row access."""
row_id: int
def __getitem__(self, column_name: str) -> Any:
"""
Get value from specific column.
Parameters:
- column_name: Column name
Returns:
Any: Column value for this row
"""
def __setitem__(self, column_name: str, value: Any) -> None:
"""
Set value in specific column.
Parameters:
- column_name: Column name
- value: Value to set
"""
def to_dict(self) -> Dict[str, Any]:
"""
Convert row to dictionary.
Returns:
Dict[str, Any]: Dictionary mapping column names to values
"""
def get_async(self, column_name: str) -> Future[Any]:
"""Get column value asynchronously."""
def set_async(self, column_name: str, value: Any) -> FutureVoid:
"""Set column value asynchronously."""
def get_bytes(self, column_name: str) -> bytes:
"""Get raw bytes representation of column value."""
def get_bytes_async(self, column_name: str) -> Future[bytes]:
"""Get raw bytes representation asynchronously."""
class RowView:
"""Read-only single row access."""
row_id: int
def __getitem__(self, column_name: str) -> Any:
"""Get value from specific column (read-only)."""
def to_dict(self) -> Dict[str, Any]:
"""Convert row to dictionary."""
def get_async(self, column_name: str) -> Future[Any]:
"""Get column value asynchronously."""
def get_bytes(self, column_name: str) -> bytes:
"""Get raw bytes representation of column value."""
def get_bytes_async(self, column_name: str) -> Future[bytes]:
"""Get raw bytes representation asynchronously."""
class RowRange:
"""Mutable multiple row access."""
def __getitem__(self, column_name: str) -> List[Any]:
"""Get values from specific column across all rows in range."""
def __setitem__(self, column_name: str, values: List[Any]) -> None:
"""Set values in specific column across all rows in range."""
def __len__(self) -> int:
"""Get number of rows in range."""
def __iter__(self) -> Iterator[Row]:
"""Iterate over rows in range."""
def summary(self) -> str:
"""Get summary statistics of the row range."""
class RowRangeView:
"""Read-only multiple row access."""
def __getitem__(self, column_name: str) -> List[Any]:
"""Get values from specific column across all rows in range."""
def __len__(self) -> int:
"""Get number of rows in range."""
def __iter__(self) -> Iterator[RowView]:
"""Iterate over rows in range."""
def summary(self) -> str:
"""Get summary statistics of the row range."""Metadata objects provide key-value storage for dataset and column metadata with type preservation.
class Metadata:
"""Mutable metadata storage."""
def __getitem__(self, key: str) -> Any:
"""Get metadata value by key."""
def __setitem__(self, key: str, value: Any) -> None:
"""Set metadata value by key."""
def __contains__(self, key: str) -> bool:
"""Check if metadata key exists."""
def keys(self) -> List[str]:
"""Get all metadata keys."""
class ReadOnlyMetadata:
"""Read-only metadata storage."""
def __getitem__(self, key: str) -> Any:
"""Get metadata value by key."""
def __contains__(self, key: str) -> bool:
"""Check if metadata key exists."""
def keys(self) -> List[str]:
"""Get all metadata keys."""import deeplake
# Open dataset
dataset = deeplake.open("./my_dataset")
# Row access
row = dataset[0] # First row
print(row["image_path"]) # Access column value
print(row.to_dict()) # Convert to dictionary
# Row range access
rows = dataset[0:10] # First 10 rows
for row in rows:
print(row["label"])
# Column access
images_column = dataset["images"]
print(len(images_column)) # Number of images
first_image = images_column[0] # First image
# Column slicing
batch_images = images_column[0:32] # First 32 images# Add new column
dataset.add_column("scores", deeplake.types.Float32())
# Append single row
dataset.append({
"images": "new_image.jpg",
"labels": "dog",
"scores": 0.95
})
# Append multiple rows
batch_data = [
{"images": f"image_{i}.jpg", "labels": f"label_{i}", "scores": 0.8 + i * 0.01}
for i in range(100)
]
dataset.extend(batch_data)
# Update specific values
dataset[0]["scores"] = 0.99 # Update single value
dataset["scores"][0:10] = [0.9] * 10 # Update range
# Column operations
scores = dataset["scores"]
scores[100] = 0.85 # Set specific score
high_scores = scores[scores > 0.9] # Filter high scores# Access data in batches
batch_size = 32
for i in range(0, len(dataset), batch_size):
batch = dataset[i:i+batch_size]
# Get batch data as lists
images = batch["images"]
labels = batch["labels"]
# Process batch
process_batch(images, labels)
# Column-wise batch operations
images_column = dataset["images"]
for i in range(0, len(images_column), batch_size):
image_batch = images_column[i:i+batch_size]
processed_batch = preprocess_images(image_batch)
# Save processed results...import asyncio
async def process_data_async(dataset):
# Get multiple values concurrently
tasks = [
dataset["images"].get_async(i)
for i in range(10)
]
images = await asyncio.gather(*tasks)
return images
# Set values asynchronously
async def update_scores_async(dataset, new_scores):
tasks = [
dataset["scores"].set_async(i, score)
for i, score in enumerate(new_scores)
]
await asyncio.gather(*tasks)# Dataset metadata
dataset.metadata["version"] = "1.0"
dataset.metadata["description"] = "Training dataset for image classification"
print(dataset.metadata.keys())
# Column metadata
images_column = dataset["images"]
images_column.metadata["preprocessing"] = "normalized"
images_column.metadata["source"] = "camera_feed"
# Access metadata
if "version" in dataset.metadata:
print(f"Dataset version: {dataset.metadata['version']}")# Create index on text column for fast queries
text_column = dataset["descriptions"]
text_column.create_index(deeplake.types.TextIndex(deeplake.types.Inverted))
# Create embedding index for similarity search
embedding_column = dataset["embeddings"]
embedding_column.create_index(
deeplake.types.EmbeddingIndex(deeplake.types.Clustered)
)
# List all indexes on column
print(text_column.indexes)
# Drop index when no longer needed
text_column.drop_index("inverted_index")Install with Tessl CLI
npx tessl i tessl/pypi-deeplakedocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10