Skip to content

Commit 47c626a

Browse files
committed
Create ai_workflow_example.py
1 parent ee0851a commit 47c626a

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

examples/ai_workflow_example.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Exemplo completo de workflow de IA com ModelSync
4+
"""
5+
6+
import os
7+
import sys
8+
import json
9+
import tempfile
10+
from pathlib import Path
11+
import numpy as np
12+
import pandas as pd
13+
from sklearn.ensemble import RandomForestClassifier
14+
from sklearn.model_selection import train_test_split
15+
from sklearn.metrics import accuracy_score, precision_score, recall_score
16+
17+
# Add project root to Python path
18+
project_root = Path(__file__).parent.parent
19+
sys.path.insert(0, str(project_root))
20+
21+
from modelsync.core.versioning import ModelSyncRepo
22+
from modelsync.storage.dataset_storage import DatasetStorage
23+
from modelsync.storage.model_storage import ModelStorage
24+
from modelsync.experiments.branching import ExperimentManager
25+
from modelsync.pipelines.ml_pipeline import PipelineManager
26+
from modelsync.deployment.continuous_deploy import DeploymentManager
27+
from modelsync.collaboration.audit import CollaborationManager
28+
29+
def create_sample_data():
30+
"""Create sample dataset for demonstration"""
31+
print("📊 Creating sample dataset...")
32+
33+
# Generate synthetic data
34+
np.random.seed(42)
35+
n_samples = 1000
36+
n_features = 10
37+
38+
X = np.random.randn(n_samples, n_features)
39+
y = (X[:, 0] + X[:, 1] + np.random.randn(n_samples) * 0.1 > 0).astype(int)
40+
41+
# Create DataFrame
42+
feature_names = [f"feature_{i}" for i in range(n_features)]
43+
df = pd.DataFrame(X, columns=feature_names)
44+
df['target'] = y
45+
46+
# Save dataset
47+
dataset_path = "sample_dataset.csv"
48+
df.to_csv(dataset_path, index=False)
49+
50+
print(f"✅ Dataset created: {dataset_path} ({len(df)} samples, {len(df.columns)-1} features)")
51+
return dataset_path
52+
53+
def train_model(X_train, X_test, y_train, y_test, hyperparams):
54+
"""Train a model with given hyperparameters"""
55+
model = RandomForestClassifier(
56+
n_estimators=hyperparams.get('n_estimators', 100),
57+
max_depth=hyperparams.get('max_depth', None),
58+
random_state=42
59+
)
60+
61+
model.fit(X_train, y_train)
62+
y_pred = model.predict(X_test)
63+
64+
metrics = {
65+
'accuracy': accuracy_score(y_test, y_pred),
66+
'precision': precision_score(y_test, y_pred),
67+
'recall': recall_score(y_test, y_pred)
68+
}
69+
70+
return model, metrics
71+
72+
def demonstrate_ai_workflow():
73+
"""Demonstrate complete AI workflow with ModelSync"""
74+
print("🚀 ModelSync AI Workflow Demo")
75+
print("=" * 50)
76+
77+
# 1. Initialize ModelSync repository
78+
print("\n1️⃣ Initializing ModelSync repository...")
79+
repo = ModelSyncRepo()
80+
if not repo.is_initialized():
81+
repo.init("AI Researcher", "researcher@example.com")
82+
83+
# 2. Create and version dataset
84+
print("\n2️⃣ Managing datasets...")
85+
dataset_path = create_sample_data()
86+
87+
dataset_storage = DatasetStorage()
88+
dataset_info = dataset_storage.add_dataset(
89+
dataset_path=dataset_path,
90+
dataset_name="Sample Classification Dataset",
91+
description="Synthetic binary classification dataset",
92+
tags=["synthetic", "classification", "binary"]
93+
)
94+
print(f"✅ Dataset versioned: {dataset_info['name']} ({dataset_info['id'][:8]})")
95+
96+
# 3. Create experiment branches
97+
print("\n3️⃣ Setting up experiment branches...")
98+
experiment_manager = ExperimentManager()
99+
100+
# Create different experiment branches
101+
branches = ["baseline", "feature_engineering", "hyperparameter_tuning"]
102+
for branch in branches:
103+
try:
104+
experiment_manager.create_branch(branch)
105+
print(f"✅ Created branch: {branch}")
106+
except ValueError:
107+
print(f"⚠️ Branch {branch} already exists")
108+
109+
# 4. Run experiments on different branches
110+
print("\n4️⃣ Running experiments...")
111+
112+
# Load dataset
113+
df = pd.read_csv(dataset_path)
114+
X = df.drop('target', axis=1)
115+
y = df['target']
116+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
117+
118+
# Experiment configurations
119+
experiments = {
120+
"baseline": {
121+
"hyperparams": {"n_estimators": 100, "max_depth": None},
122+
"description": "Baseline Random Forest"
123+
},
124+
"feature_engineering": {
125+
"hyperparams": {"n_estimators": 150, "max_depth": 10},
126+
"description": "Feature engineering with more trees"
127+
},
128+
"hyperparameter_tuning": {
129+
"hyperparams": {"n_estimators": 200, "max_depth": 15},
130+
"description": "Tuned hyperparameters"
131+
}
132+
}
133+
134+
model_storage = ModelStorage()
135+
136+
for branch_name, config in experiments.items():
137+
print(f"\n🔬 Running experiment on branch: {branch_name}")
138+
139+
# Train model
140+
model, metrics = train_model(X_train, X_test, y_train, y_test, config["hyperparams"])
141+
142+
# Save model
143+
model_path = f"model_{branch_name}.pkl"
144+
import joblib
145+
joblib.dump(model, model_path)
146+
147+
model_info = model_storage.add_model(
148+
model_path=model_path,
149+
model_name=f"RF_{branch_name}",
150+
framework="sklearn",
151+
metrics=metrics,
152+
hyperparameters=config["hyperparams"],
153+
training_info={
154+
"train_size": len(X_train),
155+
"test_size": len(X_test),
156+
"features": list(X.columns)
157+
}
158+
)
159+
160+
# Add experiment to branch
161+
branch = experiment_manager.get_branch(branch_name)
162+
if branch:
163+
experiment_data = branch.add_experiment(
164+
experiment_name=f"exp_{branch_name}",
165+
model_id=model_info["id"],
166+
dataset_id=dataset_info["id"],
167+
hyperparameters=config["hyperparams"],
168+
metrics=metrics,
169+
description=config["description"]
170+
)
171+
print(f"✅ Experiment added: {experiment_data['name']}")
172+
print(f" Metrics: {metrics}")
173+
174+
# 5. Compare experiments
175+
print("\n5️⃣ Comparing experiments...")
176+
comparison = experiment_manager.compare_branches(branches, "accuracy")
177+
178+
if "error" not in comparison:
179+
print(f"🏆 Best branch: {comparison['best_branch']}")
180+
print(f"📊 Comparison results:")
181+
for branch_data in comparison["branches"]:
182+
print(f" • {branch_data['name']}: {branch_data['avg_metric_value']:.4f} accuracy")
183+
184+
# 6. Setup deployment rules
185+
print("\n6️⃣ Setting up deployment...")
186+
deployment_manager = DeploymentManager()
187+
188+
# Add deployment rule for best performing model
189+
deployment_manager.add_deployment_rule(
190+
name="high_accuracy_deploy",
191+
branch="hyperparameter_tuning",
192+
metric_name="accuracy",
193+
threshold=0.85,
194+
operator="greater_than",
195+
deployment_target="docker",
196+
deployment_config={
197+
"image_name": "modelsync-demo",
198+
"port": "8000"
199+
}
200+
)
201+
print("✅ Deployment rule added")
202+
203+
# 7. Setup collaboration
204+
print("\n7️⃣ Setting up collaboration...")
205+
collaboration_manager = CollaborationManager()
206+
207+
# Add team members
208+
collaboration_manager.add_user("alice", "alice@example.com", "admin")
209+
collaboration_manager.add_user("bob", "bob@example.com", "contributor")
210+
collaboration_manager.add_user("charlie", "charlie@example.com", "viewer")
211+
212+
print("✅ Team members added")
213+
214+
# 8. Create ML pipeline
215+
print("\n8️⃣ Creating ML pipeline...")
216+
pipeline_manager = PipelineManager()
217+
218+
pipeline = pipeline_manager.create_pipeline("classification_pipeline")
219+
220+
# Add pipeline steps (simplified for demo)
221+
def preprocess_data(data):
222+
return data # Placeholder
223+
224+
def train_model_step(data, **params):
225+
return data # Placeholder
226+
227+
pipeline.add_step("preprocess", "data_preprocessing", preprocess_data, {}, "custom")
228+
pipeline.add_step("train", "model_training", train_model_step, {}, "custom")
229+
230+
print("✅ ML pipeline created")
231+
232+
# 9. Show repository status
233+
print("\n9️⃣ Repository status...")
234+
status = repo.status()
235+
print(f"📊 Branch: {status['branch']}")
236+
print(f"📁 Tracked files: {status['total_tracked']}")
237+
print(f"📋 Staged files: {status['total_staged']}")
238+
239+
# 10. Show audit trail
240+
print("\n🔟 Audit trail...")
241+
audit_log = collaboration_manager.audit_log
242+
recent_actions = audit_log.get_audit_trail()[:5]
243+
244+
if recent_actions:
245+
print("📝 Recent actions:")
246+
for action in recent_actions:
247+
print(f" • {action['action']} by {action['user']} at {action['timestamp']}")
248+
249+
print("\n🎉 AI Workflow Demo completed successfully!")
250+
print("\n📚 What was demonstrated:")
251+
print(" ✅ Dataset versioning with deduplication")
252+
print(" ✅ Model versioning with checkpoints")
253+
print(" ✅ Experiment branching and comparison")
254+
print(" ✅ ML pipeline creation")
255+
print(" ✅ Deployment rules setup")
256+
print(" ✅ Collaboration and audit logging")
257+
print(" ✅ Complete AI project versioning")
258+
259+
print("\n🚀 Next steps:")
260+
print(" • Start web interface: modelsync web")
261+
print(" • View experiments: modelsync experiment list")
262+
print(" • Check models: modelsync model list")
263+
print(" • View datasets: modelsync dataset list")
264+
265+
def cleanup():
266+
"""Clean up demo files"""
267+
print("\n🧹 Cleaning up demo files...")
268+
demo_files = [
269+
"sample_dataset.csv",
270+
"model_baseline.pkl",
271+
"model_feature_engineering.pkl",
272+
"model_hyperparameter_tuning.pkl"
273+
]
274+
275+
for file in demo_files:
276+
if os.path.exists(file):
277+
os.remove(file)
278+
print(f" Removed: {file}")
279+
280+
if __name__ == "__main__":
281+
try:
282+
demonstrate_ai_workflow()
283+
finally:
284+
cleanup()

0 commit comments

Comments
 (0)