#!/usr/bin/env python3 """ Exemplo completo de workflow de IA com ModelSync """ import os import sys import json import tempfile from pathlib import Path import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, precision_score, recall_score # Add project root to Python path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from modelsync.core.versioning import ModelSyncRepo from modelsync.storage.dataset_storage import DatasetStorage from modelsync.storage.model_storage import ModelStorage from modelsync.experiments.branching import ExperimentManager from modelsync.pipelines.ml_pipeline import PipelineManager from modelsync.deployment.continuous_deploy import DeploymentManager from modelsync.collaboration.audit import CollaborationManager def create_sample_data(): """Create sample dataset for demonstration""" print("๐Ÿ“Š Creating sample dataset...") # Generate synthetic data np.random.seed(42) n_samples = 1000 n_features = 10 X = np.random.randn(n_samples, n_features) y = (X[:, 0] + X[:, 1] + np.random.randn(n_samples) * 0.1 > 0).astype(int) # Create DataFrame feature_names = [f"feature_{i}" for i in range(n_features)] df = pd.DataFrame(X, columns=feature_names) df['target'] = y # Save dataset dataset_path = "sample_dataset.csv" df.to_csv(dataset_path, index=False) print(f"โœ… Dataset created: {dataset_path} ({len(df)} samples, {len(df.columns)-1} features)") return dataset_path def train_model(X_train, X_test, y_train, y_test, hyperparams): """Train a model with given hyperparameters""" model = RandomForestClassifier( n_estimators=hyperparams.get('n_estimators', 100), max_depth=hyperparams.get('max_depth', None), random_state=42 ) model.fit(X_train, y_train) y_pred = model.predict(X_test) metrics = { 'accuracy': accuracy_score(y_test, y_pred), 'precision': precision_score(y_test, y_pred), 'recall': recall_score(y_test, y_pred) } return model, metrics def demonstrate_ai_workflow(): """Demonstrate complete AI workflow with ModelSync""" print("๐Ÿš€ ModelSync AI Workflow Demo") print("=" * 50) # 1. Initialize ModelSync repository print("\n1๏ธโƒฃ Initializing ModelSync repository...") repo = ModelSyncRepo() if not repo.is_initialized(): repo.init("AI Researcher", "researcher@example.com") # 2. Create and version dataset print("\n2๏ธโƒฃ Managing datasets...") dataset_path = create_sample_data() dataset_storage = DatasetStorage() dataset_info = dataset_storage.add_dataset( dataset_path=dataset_path, dataset_name="Sample Classification Dataset", description="Synthetic binary classification dataset", tags=["synthetic", "classification", "binary"] ) print(f"โœ… Dataset versioned: {dataset_info['name']} ({dataset_info['id'][:8]})") # 3. Create experiment branches print("\n3๏ธโƒฃ Setting up experiment branches...") experiment_manager = ExperimentManager() # Create different experiment branches branches = ["baseline", "feature_engineering", "hyperparameter_tuning"] for branch in branches: try: experiment_manager.create_branch(branch) print(f"โœ… Created branch: {branch}") except ValueError: print(f"โš ๏ธ Branch {branch} already exists") # 4. Run experiments on different branches print("\n4๏ธโƒฃ Running experiments...") # Load dataset df = pd.read_csv(dataset_path) X = df.drop('target', axis=1) y = df['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Experiment configurations experiments = { "baseline": { "hyperparams": {"n_estimators": 100, "max_depth": None}, "description": "Baseline Random Forest" }, "feature_engineering": { "hyperparams": {"n_estimators": 150, "max_depth": 10}, "description": "Feature engineering with more trees" }, "hyperparameter_tuning": { "hyperparams": {"n_estimators": 200, "max_depth": 15}, "description": "Tuned hyperparameters" } } model_storage = ModelStorage() for branch_name, config in experiments.items(): print(f"\n๐Ÿ”ฌ Running experiment on branch: {branch_name}") # Train model model, metrics = train_model(X_train, X_test, y_train, y_test, config["hyperparams"]) # Save model model_path = f"model_{branch_name}.pkl" import joblib joblib.dump(model, model_path) model_info = model_storage.add_model( model_path=model_path, model_name=f"RF_{branch_name}", framework="sklearn", metrics=metrics, hyperparameters=config["hyperparams"], training_info={ "train_size": len(X_train), "test_size": len(X_test), "features": list(X.columns) } ) # Add experiment to branch branch = experiment_manager.get_branch(branch_name) if branch: experiment_data = branch.add_experiment( experiment_name=f"exp_{branch_name}", model_id=model_info["id"], dataset_id=dataset_info["id"], hyperparameters=config["hyperparams"], metrics=metrics, description=config["description"] ) print(f"โœ… Experiment added: {experiment_data['name']}") print(f" Metrics: {metrics}") # 5. Compare experiments print("\n5๏ธโƒฃ Comparing experiments...") comparison = experiment_manager.compare_branches(branches, "accuracy") if "error" not in comparison: print(f"๐Ÿ† Best branch: {comparison['best_branch']}") print(f"๐Ÿ“Š Comparison results:") for branch_data in comparison["branches"]: print(f" โ€ข {branch_data['name']}: {branch_data['avg_metric_value']:.4f} accuracy") # 6. Setup deployment rules print("\n6๏ธโƒฃ Setting up deployment...") deployment_manager = DeploymentManager() # Add deployment rule for best performing model deployment_manager.add_deployment_rule( name="high_accuracy_deploy", branch="hyperparameter_tuning", metric_name="accuracy", threshold=0.85, operator="greater_than", deployment_target="docker", deployment_config={ "image_name": "modelsync-demo", "port": "8000" } ) print("โœ… Deployment rule added") # 7. Setup collaboration print("\n7๏ธโƒฃ Setting up collaboration...") collaboration_manager = CollaborationManager() # Add team members collaboration_manager.add_user("alice", "alice@example.com", "admin") collaboration_manager.add_user("bob", "bob@example.com", "contributor") collaboration_manager.add_user("charlie", "charlie@example.com", "viewer") print("โœ… Team members added") # 8. Create ML pipeline print("\n8๏ธโƒฃ Creating ML pipeline...") pipeline_manager = PipelineManager() pipeline = pipeline_manager.create_pipeline("classification_pipeline") # Add pipeline steps (simplified for demo) def preprocess_data(data): return data # Placeholder def train_model_step(data, **params): return data # Placeholder pipeline.add_step("preprocess", "data_preprocessing", preprocess_data, {}, "custom") pipeline.add_step("train", "model_training", train_model_step, {}, "custom") print("โœ… ML pipeline created") # 9. Show repository status print("\n9๏ธโƒฃ Repository status...") status = repo.status() print(f"๐Ÿ“Š Branch: {status['branch']}") print(f"๐Ÿ“ Tracked files: {status['total_tracked']}") print(f"๐Ÿ“‹ Staged files: {status['total_staged']}") # 10. Show audit trail print("\n๐Ÿ”Ÿ Audit trail...") audit_log = collaboration_manager.audit_log recent_actions = audit_log.get_audit_trail()[:5] if recent_actions: print("๐Ÿ“ Recent actions:") for action in recent_actions: print(f" โ€ข {action['action']} by {action['user']} at {action['timestamp']}") print("\n๐ŸŽ‰ AI Workflow Demo completed successfully!") print("\n๐Ÿ“š What was demonstrated:") print(" โœ… Dataset versioning with deduplication") print(" โœ… Model versioning with checkpoints") print(" โœ… Experiment branching and comparison") print(" โœ… ML pipeline creation") print(" โœ… Deployment rules setup") print(" โœ… Collaboration and audit logging") print(" โœ… Complete AI project versioning") print("\n๐Ÿš€ Next steps:") print(" โ€ข Start web interface: modelsync web") print(" โ€ข View experiments: modelsync experiment list") print(" โ€ข Check models: modelsync model list") print(" โ€ข View datasets: modelsync dataset list") def cleanup(): """Clean up demo files""" print("\n๐Ÿงน Cleaning up demo files...") demo_files = [ "sample_dataset.csv", "model_baseline.pkl", "model_feature_engineering.pkl", "model_hyperparameter_tuning.pkl" ] for file in demo_files: if os.path.exists(file): os.remove(file) print(f" Removed: {file}") if __name__ == "__main__": try: demonstrate_ai_workflow() finally: cleanup()