tessl install tessl/pypi-kedro@1.1.0Kedro helps you build production-ready data and analytics pipelines
Agent Success
Agent success rate when using this tile
98%
Improvement
Agent success rate improvement when using this tile compared to baseline
1.32x
Baseline
Agent success rate without this tile
74%
Helper functions and constants for common operations from kedro.utils and kedro.io.core.
from kedro.utils import load_obj, is_kedro_project, find_kedro_project, HTTP_PROTOCOLS, CLOUD_PROTOCOLS, PROTOCOL_DELIMITER
from kedro.io.core import generate_timestamp, get_protocol_and_path, get_filepath_str, is_parameterdef load_obj(obj_path: str, default_obj_path: str = "") -> Any:
"""
Extract an object from a given module path.
Parameters:
- obj_path: Path to object (e.g., 'mymodule.MyClass')
- default_obj_path: Default module path if obj_path has no module
Returns:
The loaded object (class, function, or variable)
Raises:
AttributeError: When the object does not have the given named attribute
Example:
>>> load_obj('kedro.pipeline.Pipeline')
<class 'kedro.pipeline.pipeline.Pipeline'>
>>> load_obj('SequentialRunner', 'kedro.runner')
<class 'kedro.runner.sequential_runner.SequentialRunner'>
"""def is_kedro_project(project_path: str | Path) -> bool:
"""
Check if a given path is the root of a Kedro project.
Parameters:
- project_path: Path to test
Returns:
True if path is a Kedro project root, False otherwise
Note:
Checks for pyproject.toml with [tool.kedro] section
"""
def find_kedro_project(current_dir: Path) -> Path | None:
"""
Find a Kedro project starting from current directory.
Searches current directory and all parent directories for a Kedro project.
Parameters:
- current_dir: Starting directory for search
Returns:
Path to Kedro project root, or None if not found
Example:
>>> from pathlib import Path
>>> find_kedro_project(Path.cwd())
PosixPath('/home/user/my-kedro-project')
"""from kedro.io.core import generate_timestampdef generate_timestamp() -> str:
"""
Generate a timestamp for versioning datasets.
Returns:
String representation of the current UTC timestamp in VERSION_FORMAT
(YYYY-MM-DDThh.mm.ss.sssZ format, e.g., '2024-01-15T10.30.45.123Z')
Note:
Used internally by versioned datasets. Can be used to create custom
version strings for manual versioning.
Example:
>>> from kedro.io.core import generate_timestamp
>>> generate_timestamp()
'2024-01-15T10.30.45.123Z'
"""from kedro.io.core import get_protocol_and_path, get_filepath_str, is_parameterdef get_protocol_and_path(
filepath: str | os.PathLike,
version: Version | None = None
) -> tuple[str, str]:
"""
Parse a filepath into protocol and path components.
Parameters:
- filepath: Raw filepath (e.g., 'gcs://bucket/test.json' or 'data/test.csv')
- version: Optional Version instance for versioned datasets
Returns:
Tuple of (protocol, path) where protocol is empty string for local paths
Warning:
Versioning is not supported for HTTP protocols.
Example:
>>> get_protocol_and_path('gcs://bucket/data.csv')
('gcs', 'bucket/data.csv')
>>> get_protocol_and_path('data/local.csv')
('', 'data/local.csv')
>>> get_protocol_and_path('s3://bucket/data.csv', Version('v1', 'v2'))
('s3', 'bucket/data.csv/v2/data.csv')
"""
def get_filepath_str(raw_path: PurePath, protocol: str) -> str:
"""
Convert a path and protocol into a filepath string.
Returns full filepath with protocol prefix for HTTP(s) protocols,
otherwise returns just the path as a POSIX string.
Parameters:
- raw_path: Path without protocol
- protocol: Protocol string (e.g., 'http', 's3', '')
Returns:
Filepath string with protocol prefix if HTTP(s), otherwise just the path
Example:
>>> from pathlib import PurePath
>>> get_filepath_str(PurePath('data/file.csv'), 's3')
'data/file.csv'
>>> get_filepath_str(PurePath('example.com/file.csv'), 'https')
'https://example.com/file.csv'
"""
def is_parameter(dataset_name: str) -> bool:
"""
Check if a dataset name represents a parameter.
Parameters are special datasets that store configuration values
and are identified by the 'params:' prefix or the exact name 'parameters'.
Parameters:
- dataset_name: Name of the dataset to check
Returns:
True if dataset is a parameter, False otherwise
Example:
>>> is_parameter('params:learning_rate')
True
>>> is_parameter('parameters')
True
>>> is_parameter('raw_data')
False
"""HTTP_PROTOCOLS: tuple[str, ...] = ("http", "https")
CLOUD_PROTOCOLS: tuple[str, ...] = (
"abfs", # Azure Blob File System
"abfss", # Azure Blob File System (secure)
"adl", # Azure Data Lake
"gcs", # Google Cloud Storage
"gdrive", # Google Drive
"gs", # Google Storage
"oci", # Oracle Cloud Infrastructure
"oss", # Alibaba Cloud OSS
"s3", # AWS S3
"s3a", # AWS S3 (Hadoop)
"s3n", # AWS S3 (legacy Hadoop)
)
PROTOCOL_DELIMITER: str = "://"from pathlib import Path
from kedro.utils import find_kedro_project
def get_project_root() -> Path:
"""Get the root of the current Kedro project."""
current_dir = Path.cwd()
project_root = find_kedro_project(current_dir)
if project_root is None:
raise RuntimeError("Not inside a Kedro project")
return project_root
# Usage
project_root = get_project_root()
conf_path = project_root / "conf"See also: