Python SDK for Feast - an open source feature store for machine learning that manages features for both training and serving environments.
npx @tessl/cli install tessl/pypi-feast@0.53.0Feast (Feature Store) is a comprehensive open-source feature store for machine learning that enables ML platform teams to consistently manage features for both training and serving environments. The system provides an offline store for processing historical data at scale, a low-latency online store for real-time predictions, and a battle-tested feature server for serving pre-computed features.
pip install feastimport feast
from feast import FeatureStoreCommon imports for feature definitions:
from feast import (
Entity,
FeatureView,
BatchFeatureView,
OnDemandFeatureView,
StreamFeatureView,
FeatureService,
Feature,
Field,
FileSource,
ValueType,
RepoConfig,
Project
)Data source imports:
from feast import (
BigQuerySource,
RedshiftSource,
SnowflakeSource,
AthenaSource,
KafkaSource,
KinesisSource,
PushSource,
RequestSource
)Vector store imports:
from feast import FeastVectorStoreimport pandas as pd
from feast import FeatureStore, Entity, FeatureView, Field, FileSource, ValueType
from datetime import timedelta
# Initialize feature store from repo directory
fs = FeatureStore(repo_path=".")
# Define an entity
customer = Entity(
name="customer",
value_type=ValueType.INT64,
description="Customer identifier"
)
# Define a data source
customer_source = FileSource(
path="data/customer_features.parquet",
timestamp_field="event_timestamp"
)
# Define a feature view
customer_fv = FeatureView(
name="customer_features",
entities=[customer],
ttl=timedelta(days=1),
schema=[
Field(name="age", dtype=ValueType.INT64),
Field(name="income", dtype=ValueType.DOUBLE),
Field(name="city", dtype=ValueType.STRING)
],
source=customer_source
)
# Apply definitions to registry
fs.apply([customer, customer_fv])
# Get historical features for training
entity_df = pd.DataFrame({
"customer": [1001, 1002, 1003],
"event_timestamp": [
pd.Timestamp("2023-01-01"),
pd.Timestamp("2023-01-02"),
pd.Timestamp("2023-01-03")
]
})
training_df = fs.get_historical_features(
entity_df=entity_df,
features=["customer_features:age", "customer_features:income"]
).to_df()
# Get online features for serving
online_features = fs.get_online_features(
features=["customer_features:age", "customer_features:income"],
entity_rows=[{"customer": 1001}]
)Feast provides a comprehensive feature store architecture with several key components:
This architecture enables teams to prevent data leakage through point-in-time correctness, decouple ML from data infrastructure, and ensure model portability across environments while supporting multiple data sources and deployment scenarios.
Core feature store operations including initialization, configuration, and lifecycle management. The FeatureStore class serves as the primary interface for all feature operations.
class FeatureStore:
def __init__(self, repo_path: Optional[str] = None, config: Optional[RepoConfig] = None): ...
def apply(self, objects: List[Union[Entity, FeatureView, FeatureService]]): ...
def get_historical_features(self, entity_df: pd.DataFrame, features: List[str]) -> RetrievalJob: ...
def get_online_features(self, features: List[str], entity_rows: List[Dict[str, Any]]) -> OnlineResponse: ...
def materialize(self, start_date: datetime, end_date: datetime, feature_views: Optional[List[str]] = None): ...Entity definitions that establish the primary keys and identifiers around which features are organized. Entities define collections of related features and enable proper joining across different data sources.
class Entity:
def __init__(self, name: str, value_type: ValueType, join_key: Optional[str] = None, description: str = "", tags: Optional[Dict[str, str]] = None): ...
@dataclass
class ValueType(Enum):
UNKNOWN = 0
BYTES = 1
STRING = 2
INT32 = 3
INT64 = 4
DOUBLE = 5
FLOAT = 6
BOOL = 7Feature view types that define how features are computed, stored, and served. Different view types support various feature engineering patterns from batch processing to real-time transformations.
class FeatureView:
def __init__(self, name: str, entities: List[Union[Entity, str]], schema: List[Field], source: DataSource, ttl: Optional[timedelta] = None): ...
class BatchFeatureView:
def __init__(self, name: str, entities: List[Union[Entity, str]], schema: List[Field], source: DataSource): ...
class OnDemandFeatureView:
def __init__(self, name: str, sources: Dict[str, Union[FeatureView, FeatureService]], udf: PythonTransformation): ...Data source implementations for connecting to various storage systems and streaming platforms. Each connector provides optimized access patterns for different data infrastructure scenarios.
class FileSource:
def __init__(self, path: str, timestamp_field: Optional[str] = None, created_timestamp_column: Optional[str] = None): ...
class BigQuerySource:
def __init__(self, table: str, timestamp_field: Optional[str] = None, created_timestamp_column: Optional[str] = None): ...
class KafkaSource:
def __init__(self, kafka_bootstrap_servers: str, message_format: StreamFormat, topic: str): ...Command-line interface for managing feature store operations, deployments, and development workflows. The CLI provides essential tools for feature engineering teams.
feast init PROJECT_NAME # Initialize new project
feast apply # Apply feature definitions
feast materialize # Materialize features to online store
feast serve # Start feature serverVector store functionality for RAG (Retrieval-Augmented Generation) applications and semantic search using feature store infrastructure.
class FeastVectorStore:
def __init__(self, repo_path: str, rag_view: FeatureView, features: List[str]): ...
def query(self, query_vector: Optional[np.ndarray] = None, query_string: Optional[str] = None, top_k: int = 10) -> OnlineResponse: ...@dataclass
class Field:
name: str
dtype: ValueType
description: str = ""
tags: Optional[Dict[str, str]] = None
class RepoConfig:
def __init__(self, registry: str, project: str, provider: str): ...
class OnlineResponse:
def to_dict(self) -> Dict[str, List[Any]]: ...
def to_df(self) -> pd.DataFrame: ...
class RetrievalJob:
def to_df(self) -> pd.DataFrame: ...
def to_arrow(self) -> pa.Table: ...
class Project:
name: str
description: str
tags: Dict[str, str]
class Permission:
name: str
types: List[str]
policy: str
class SavedDataset:
name: str
features: List[str]
join_keys: List[str]
storage: SavedDatasetStorage
class ValidationReference:
name: str
dataset: SavedDataset
class LoggingSource:
def __init__(self, name: str, source_type: str): ...
class LoggingConfig:
destination: str
format: str