tessl install tessl/pypi-kedro@1.1.0Kedro helps you build production-ready data and analytics pipelines
Agent Success
Agent success rate when using this tile
98%
Improvement
Agent success rate improvement when using this tile compared to baseline
1.32x
Baseline
Agent success rate without this tile
74%
Get started with Kedro in 5 minutes with a minimal working example.
pip install kedrofrom kedro.pipeline import node, pipeline
from kedro.io import DataCatalog, MemoryDataset
from kedro.runner import SequentialRunner
# Step 1: Define functions
def clean_data(raw_data):
return [x.strip().lower() for x in raw_data]
def count_items(cleaned_data):
return {"count": len(cleaned_data), "items": cleaned_data}
# Step 2: Create nodes
clean_node = node(clean_data, "raw_data", "cleaned_data")
count_node = node(count_items, "cleaned_data", "results")
# Step 3: Build pipeline
my_pipeline = pipeline([clean_node, count_node])
# Step 4: Set up data catalog
catalog = DataCatalog({
"raw_data": MemoryDataset([" Apple ", " Banana ", " Cherry "]),
"cleaned_data": MemoryDataset(),
"results": MemoryDataset()
})
# Step 5: Run pipeline
runner = SequentialRunner()
outputs = runner.run(my_pipeline, catalog)
# Step 6: Get results
results = outputs["results"].load()
print(results) # {'count': 3, 'items': ['apple', 'banana', 'cherry']}→ Creating Pipelines Guide - Comprehensive pipeline patterns
→ Working with Data Guide - Data catalog and custom datasets
→ Parallel Execution Guide - Optimize performance
def analyze_data(cleaned_data):
return {"max_length": max(len(x) for x in cleaned_data)}
analyze_node = node(analyze_data, "cleaned_data", "analysis")
# Add to pipeline
my_pipeline = pipeline([clean_node, count_node, analyze_node])def filter_data(data, min_length):
return [x for x in data if len(x) >= min_length]
filter_node = node(
filter_data,
inputs=["cleaned_data", "params:min_length"],
outputs="filtered_data"
)
# Add parameter to catalog
catalog["params:min_length"] = MemoryDataset(5)# Run only from cleaned_data onwards
partial_pipeline = my_pipeline.from_inputs("cleaned_data")
runner.run(partial_pipeline, catalog)# Make sure kedro is installed
pip install kedroCheck that:
Ensure you call .load() on output datasets:
# ✅ Correct
results = outputs["results"].load()
# ❌ Returns MemoryDataset object, not data
results = outputs["results"]See also: