Skip to content

Commit bff5627

Browse files
feat: Enable static artifacts for feature server that can be used in Feature Transformations
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
1 parent b601ef2 commit bff5627

File tree

5 files changed

+565
-87
lines changed

5 files changed

+565
-87
lines changed

sdk/python/feast/feature_server.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,50 @@ async def _get_features(
155155
return features
156156

157157

158+
async def load_static_artifacts(app: FastAPI, store):
159+
"""
160+
Load static artifacts (models, lookup tables, etc.) into app.state.
161+
162+
This function can be extended to load various types of static artifacts:
163+
- Small ML models (scikit-learn, small neural networks)
164+
- Lookup tables and reference data
165+
- Configuration parameters
166+
- Pre-computed embeddings
167+
168+
Note: Not recommended for large language models - use dedicated
169+
model serving solutions (vLLM, TGI, etc.) for those.
170+
"""
171+
try:
172+
# Import here to avoid loading heavy dependencies unless needed
173+
import importlib.util
174+
import inspect
175+
from pathlib import Path
176+
177+
# Look for static artifacts loading in the feature repository
178+
# This allows templates and users to define their own artifact loading
179+
repo_path = Path(store.repo_path) if store.repo_path else Path.cwd()
180+
artifacts_file = repo_path / "static_artifacts.py"
181+
182+
if artifacts_file.exists():
183+
# Load and execute custom static artifacts loading
184+
spec = importlib.util.spec_from_file_location("static_artifacts", artifacts_file)
185+
if spec and spec.loader:
186+
artifacts_module = importlib.util.module_from_spec(spec)
187+
spec.loader.exec_module(artifacts_module)
188+
189+
# Look for load_artifacts function
190+
if hasattr(artifacts_module, 'load_artifacts'):
191+
load_func = artifacts_module.load_artifacts
192+
if inspect.iscoroutinefunction(load_func):
193+
await load_func(app)
194+
else:
195+
load_func(app)
196+
logger.info("Loaded static artifacts from static_artifacts.py")
197+
except Exception as e:
198+
# Non-fatal error - feature server should still start
199+
logger.warning(f"Failed to load static artifacts: {e}")
200+
201+
158202
def get_app(
159203
store: "feast.FeatureStore",
160204
registry_ttl_sec: int = DEFAULT_FEATURE_SERVER_REGISTRY_TTL,
@@ -215,8 +259,12 @@ def async_refresh():
215259
active_timer = threading.Timer(registry_ttl_sec, async_refresh)
216260
active_timer.start()
217261

262+
218263
@asynccontextmanager
219264
async def lifespan(app: FastAPI):
265+
# Load static artifacts before initializing store
266+
await load_static_artifacts(app, store)
267+
220268
await store.initialize()
221269
async_refresh()
222270
yield

sdk/python/feast/templates/pytorch_nlp/README.md

Lines changed: 116 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ my-sentiment-project/
157157
├── README.md # This file
158158
└── feature_repo/
159159
├── feature_store.yaml # Feast configuration
160-
├── example_repo.py # Feature definitions
160+
├── example_repo.py # Feature definitions (uses pre-loaded artifacts)
161+
├── static_artifacts.py # Static artifacts loading (models, lookup tables)
161162
├── test_workflow.py # Complete demo workflow
162163
└── data/ # Generated sample data
163164
└── sentiment_data.parquet
@@ -205,6 +206,109 @@ offline_store:
205206
- ✅ **No external services** - No Redis/cloud required
206207
- ✅ **Perfect for demos** - Easy to share and understand
207208

209+
## 🚀 Static Artifacts Loading
210+
211+
This template demonstrates **static artifacts loading** - a performance optimization that loads models, lookup tables, and other artifacts once at feature server startup instead of on each request.
212+
213+
### What are Static Artifacts?
214+
215+
Static artifacts are pre-loaded resources that remain constant during server operation:
216+
- **Small ML models** (sentiment analysis, classification, small neural networks)
217+
- **Lookup tables and mappings** (label encoders, category mappings)
218+
- **Configuration data** (model parameters, feature mappings)
219+
- **Pre-computed embeddings** (user embeddings, item features)
220+
221+
### Performance Benefits
222+
223+
**Before (Per-Request Loading):**
224+
```python
225+
def sentiment_prediction(inputs):
226+
# ❌ Model loads on every request - slow!
227+
model = pipeline("sentiment-analysis", model="...")
228+
return model(inputs["text"])
229+
```
230+
231+
**After (Startup Loading):**
232+
```python
233+
# ✅ Model loads once at server startup
234+
def sentiment_prediction(inputs):
235+
global _sentiment_model # Pre-loaded model
236+
return _sentiment_model(inputs["text"])
237+
```
238+
239+
**Performance Impact:**
240+
- 🚀 **10-100x faster** inference (no model loading overhead)
241+
- 💾 **Lower memory usage** (shared model across requests)
242+
- ⚡ **Better scalability** (consistent response times)
243+
244+
### How It Works
245+
246+
1. **Startup**: Feast server loads `static_artifacts.py` during initialization
247+
2. **Loading**: `load_artifacts(app)` function stores models in `app.state`
248+
3. **Access**: On-demand feature views access pre-loaded artifacts via global references
249+
250+
```python
251+
# static_artifacts.py - Define what to load
252+
def load_artifacts(app: FastAPI):
253+
app.state.sentiment_model = load_sentiment_model()
254+
app.state.lookup_tables = load_lookup_tables()
255+
256+
# Update global references for easy access
257+
import example_repo
258+
example_repo._sentiment_model = app.state.sentiment_model
259+
example_repo._lookup_tables = app.state.lookup_tables
260+
261+
# example_repo.py - Use pre-loaded artifacts
262+
_sentiment_model = None # Set by static_artifacts.py
263+
264+
def sentiment_prediction(inputs):
265+
global _sentiment_model
266+
if _sentiment_model is not None:
267+
return _sentiment_model(inputs["text"])
268+
else:
269+
return fallback_predictions()
270+
```
271+
272+
### Scope and Limitations
273+
274+
**✅ Great for:**
275+
- Small to medium models (< 1GB)
276+
- Fast-loading models (sentiment analysis, classification)
277+
- Lookup tables and reference data
278+
- Configuration parameters
279+
- Pre-computed embeddings
280+
281+
**❌ Not recommended for:**
282+
- **Large Language Models (LLMs)** - Use dedicated serving solutions like vLLM, TGI, or TensorRT-LLM
283+
- Models requiring GPU clusters
284+
- Frequently updated models
285+
- Models with complex initialization dependencies
286+
287+
**Note:** Feast is optimized for feature serving, not large model inference. For production LLM workloads, use specialized model serving platforms.
288+
289+
### Customizing Static Artifacts
290+
291+
To add your own artifacts, modify `static_artifacts.py`:
292+
293+
```python
294+
def load_custom_embeddings():
295+
"""Load pre-computed user embeddings."""
296+
embeddings_file = Path(__file__).parent / "data" / "user_embeddings.npy"
297+
if embeddings_file.exists():
298+
import numpy as np
299+
return {"embeddings": np.load(embeddings_file)}
300+
return None
301+
302+
def load_artifacts(app: FastAPI):
303+
# Load your custom artifacts
304+
app.state.custom_embeddings = load_custom_embeddings()
305+
app.state.config_params = {"threshold": 0.7, "top_k": 10}
306+
307+
# Make them available to feature views
308+
import example_repo
309+
example_repo._custom_embeddings = app.state.custom_embeddings
310+
```
311+
208312
## 📚 Detailed Usage
209313

210314
### 1. Feature Store Setup
@@ -409,20 +513,21 @@ def toxicity_detection(inputs: pd.DataFrame) -> pd.DataFrame:
409513
### Performance Optimization
410514

411515
**Current Architecture:**
412-
- Models load on each request (see `sentiment_prediction` function)
516+
-**Static artifacts loading** at server startup (see `static_artifacts.py`)
517+
-**Pre-loaded models** cached in memory for fast inference
413518
- CPU-only operation to avoid multiprocessing issues
414519
- SQLite-based storage for fast local access
415520

416-
**TODO: Optimization Opportunities:**
417-
- **Startup-time Model Loading**: Load models once at server startup instead of per-request
418-
- **Custom Provider**: Implement model caching via custom Feast provider
419-
- **Model Serving Layer**: Use dedicated model servers (TorchServe, MLflow) for heavy models
521+
**Implemented Optimizations:**
522+
- **Startup-time Model Loading**: ✅ Models load once at server startup via `static_artifacts.py`
523+
- **Memory-efficient Caching**: ✅ Models stored in `app.state` and accessed via global references
524+
- **Fallback Handling**: ✅ Graceful degradation when artifacts fail to load
420525

421-
**Production Optimizations:**
422-
1. **Model Caching**: Cache loaded models in memory to avoid repeated loading
423-
2. **Batch Inference**: Process multiple texts together for efficiency
424-
3. **Feature Materialization**: Pre-compute expensive features offline
425-
4. **Async Processing**: Use async patterns for real-time serving
526+
**Additional Production Optimizations:**
527+
1. **Batch Inference**: Process multiple texts together for efficiency
528+
2. **Feature Materialization**: Pre-compute expensive features offline
529+
3. **Async Processing**: Use async patterns for real-time serving
530+
4. **Model Serving Layer**: Use dedicated model servers (TorchServe, vLLM) for large models
426531

427532
### Production Configuration Examples
428533

sdk/python/feast/templates/pytorch_nlp/feature_repo/example_repo.py

Lines changed: 75 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@
2525
from feast.on_demand_feature_view import on_demand_feature_view
2626
from feast.types import Array, Float32, Int64, String
2727

28+
try:
29+
# Import static artifacts helpers (available when feature server loads artifacts)
30+
from static_artifacts import get_sentiment_model, get_lookup_tables
31+
except ImportError:
32+
# Fallback for when static_artifacts.py is not available
33+
get_sentiment_model = None
34+
get_lookup_tables = None
35+
36+
# Global references for static artifacts (set by feature server)
37+
_sentiment_model = None
38+
_lookup_tables = {}
39+
2840
# Configuration
2941
repo_path = Path(__file__).parent
3042
data_path = repo_path / "data"
@@ -143,100 +155,87 @@
143155
)
144156
def sentiment_prediction(inputs: pd.DataFrame) -> pd.DataFrame:
145157
"""
146-
Real-time sentiment prediction using pre-trained models.
158+
Real-time sentiment prediction using pre-loaded static artifacts.
147159
148-
This function demonstrates how to integrate PyTorch/HuggingFace models
149-
directly into Feast feature views for real-time inference.
160+
This function demonstrates how to use static artifacts (pre-loaded models,
161+
lookup tables) for efficient real-time inference. Models are loaded once
162+
at feature server startup rather than on each request.
150163
"""
151164
try:
152165
import numpy as np
153-
from transformers import pipeline
154166
except ImportError:
155-
# Fallback to dummy predictions if dependencies aren't available
156-
df = pd.DataFrame()
157-
df["predicted_sentiment"] = ["neutral"] * len(inputs)
158-
df["sentiment_confidence"] = np.array([0.5] * len(inputs), dtype=np.float32)
159-
df["positive_prob"] = np.array([0.33] * len(inputs), dtype=np.float32)
160-
df["negative_prob"] = np.array([0.33] * len(inputs), dtype=np.float32)
161-
df["neutral_prob"] = np.array([0.34] * len(inputs), dtype=np.float32)
162-
df["text_embedding"] = [[np.float32(0.0)] * 384] * len(inputs)
163-
return df
167+
# Fallback to dummy predictions if numpy isn't available
168+
import array as np_fallback
164169

165-
# Initialize model (in production, you'd want to cache this)
166-
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
167-
try:
168-
# Use sentiment pipeline for convenience (force CPU to avoid MPS forking issues)
169-
sentiment_pipeline = pipeline(
170-
"sentiment-analysis",
171-
model=model_name,
172-
tokenizer=model_name,
173-
return_all_scores=True,
174-
device="cpu", # Force CPU to avoid MPS forking issues on macOS
175-
)
176-
177-
except Exception:
178-
# Fallback if model loading fails
179170
df = pd.DataFrame()
180171
df["predicted_sentiment"] = ["neutral"] * len(inputs)
181-
df["sentiment_confidence"] = np.array([0.5] * len(inputs), dtype=np.float32)
182-
df["positive_prob"] = np.array([0.33] * len(inputs), dtype=np.float32)
183-
df["negative_prob"] = np.array([0.33] * len(inputs), dtype=np.float32)
184-
df["neutral_prob"] = np.array([0.34] * len(inputs), dtype=np.float32)
185-
df["text_embedding"] = [[np.float32(0.0)] * 384] * len(inputs)
172+
df["sentiment_confidence"] = [0.5] * len(inputs)
173+
df["positive_prob"] = [0.33] * len(inputs)
174+
df["negative_prob"] = [0.33] * len(inputs)
175+
df["neutral_prob"] = [0.34] * len(inputs)
176+
df["text_embedding"] = [[0.0] * 384] * len(inputs)
186177
return df
187178

179+
# Get pre-loaded static artifacts from global references
180+
# These are loaded once at startup via static_artifacts.py
181+
global _sentiment_model, _lookup_tables
182+
183+
sentiment_model = _sentiment_model
184+
lookup_tables = _lookup_tables
185+
186+
# Use lookup table for label mapping (from static artifacts)
187+
label_map = lookup_tables.get("sentiment_labels", {
188+
"LABEL_0": "negative",
189+
"LABEL_1": "neutral",
190+
"LABEL_2": "positive"
191+
})
192+
188193
results = []
189194

190195
for text in inputs["input_text"]:
191196
try:
192-
# Get sentiment predictions
193-
predictions = sentiment_pipeline(text)
194-
195-
# Parse results (RoBERTa model returns LABEL_0, LABEL_1, LABEL_2)
196-
label_map = {
197-
"LABEL_0": "negative",
198-
"LABEL_1": "neutral",
199-
"LABEL_2": "positive",
200-
}
201-
202-
scores = {
203-
label_map.get(pred["label"], pred["label"]): pred["score"]
204-
for pred in predictions
205-
}
206-
207-
# Get best prediction
208-
best_pred = max(predictions, key=lambda x: x["score"])
209-
predicted_sentiment = label_map.get(best_pred["label"], best_pred["label"])
210-
confidence = best_pred["score"]
211-
212-
# Get embeddings (simplified - dummy embeddings for demo)
213-
# In a real implementation, you'd run the model to get embeddings
214-
# For this demo, we'll create a dummy embedding
215-
embedding = np.random.rand(384).tolist() # DistilBERT size
216-
217-
results.append(
218-
{
219-
"predicted_sentiment": predicted_sentiment,
220-
"sentiment_confidence": np.float32(confidence),
221-
"positive_prob": np.float32(scores.get("positive", 0.0)),
222-
"negative_prob": np.float32(scores.get("negative", 0.0)),
223-
"neutral_prob": np.float32(scores.get("neutral", 0.0)),
224-
"text_embedding": [np.float32(x) for x in embedding],
197+
if sentiment_model is not None:
198+
# Use pre-loaded model for prediction
199+
predictions = sentiment_model(text)
200+
201+
# Parse results using static lookup tables
202+
scores = {
203+
label_map.get(pred["label"], pred["label"]): pred["score"]
204+
for pred in predictions
225205
}
226-
)
206+
207+
# Get best prediction
208+
best_pred = max(predictions, key=lambda x: x["score"])
209+
predicted_sentiment = label_map.get(best_pred["label"], best_pred["label"])
210+
confidence = best_pred["score"]
211+
else:
212+
# Fallback when model is not available
213+
predicted_sentiment = "neutral"
214+
confidence = 0.5
215+
scores = {"positive": 0.33, "negative": 0.33, "neutral": 0.34}
216+
217+
# Generate dummy embeddings (in production, use pre-loaded embeddings)
218+
embedding = np.random.rand(384).tolist()
219+
220+
results.append({
221+
"predicted_sentiment": predicted_sentiment,
222+
"sentiment_confidence": np.float32(confidence),
223+
"positive_prob": np.float32(scores.get("positive", 0.0)),
224+
"negative_prob": np.float32(scores.get("negative", 0.0)),
225+
"neutral_prob": np.float32(scores.get("neutral", 0.0)),
226+
"text_embedding": [np.float32(x) for x in embedding],
227+
})
227228

228229
except Exception:
229230
# Fallback for individual text processing errors
230-
results.append(
231-
{
232-
"predicted_sentiment": "neutral",
233-
"sentiment_confidence": np.float32(0.5),
234-
"positive_prob": np.float32(0.33),
235-
"negative_prob": np.float32(0.33),
236-
"neutral_prob": np.float32(0.34),
237-
"text_embedding": [np.float32(0.0)] * 384,
238-
}
239-
)
231+
results.append({
232+
"predicted_sentiment": "neutral",
233+
"sentiment_confidence": np.float32(0.5),
234+
"positive_prob": np.float32(0.33),
235+
"negative_prob": np.float32(0.33),
236+
"neutral_prob": np.float32(0.34),
237+
"text_embedding": [np.float32(0.0)] * 384,
238+
})
240239

241240
return pd.DataFrame(results)
242241

0 commit comments

Comments
 (0)