-
-
Notifications
You must be signed in to change notification settings - Fork 271
Expand file tree
/
Copy pathsimple_flows_and_runs_tutorial.py
More file actions
122 lines (104 loc) · 3.73 KB
/
simple_flows_and_runs_tutorial.py
File metadata and controls
122 lines (104 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# %% [markdown]
# A simple tutorial on how to upload results from a machine learning experiment to OpenML.
# %%
import sklearn
from sklearn.neighbors import KNeighborsClassifier
import openml
# %% [markdown]
# <div class="admonition warning">
# <p class="admonition-title">Warning</p>
# <p>
# This example uploads data. For that reason, this example connects to the
# test server at <a href="https://test.openml.org"
# target="_blank">test.openml.org</a>.<br>
# This prevents the main server from becoming overloaded with example datasets, tasks,
# runs, and other submissions.<br>
# Using this test server may affect the behavior and performance of the
# OpenML-Python API.
# </p>
# </div>
# %%
openml.config.start_using_configuration_for_example()
# %% [markdown]
# ## Train a machine learning model and evaluate it
# NOTE: We are using task 119 from the test server: https://test.openml.org/d/20
# %%
task = openml.tasks.get_task(119)
# Get the data
dataset = task.get_dataset()
X, y, categorical_indicator, attribute_names = dataset.get_data(
target=dataset.default_target_attribute
)
# Get the holdout split from the task
train_indices, test_indices = task.get_train_test_split_indices(fold=0, repeat=0)
X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
knn_parameters = {
"n_neighbors": 3,
}
clf = KNeighborsClassifier(**knn_parameters)
clf.fit(X_train, y_train)
# Get experiment results
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)
# %% [markdown]
# ## Upload the machine learning experiments to OpenML
# First, create a fow and fill it with metadata about the machine learning model.
# %%
knn_flow = openml.flows.OpenMLFlow(
# Metadata
model=clf, # or None, if you do not want to upload the model object.
name="CustomKNeighborsClassifier",
description="A custom KNeighborsClassifier flow for OpenML.",
external_version=f"{sklearn.__version__}",
language="English",
tags=["openml_tutorial_knn"],
dependencies=f"{sklearn.__version__}",
# Hyperparameters
parameters={k: str(v) for k, v in knn_parameters.items()},
parameters_meta_info={
"n_neighbors": {"description": "number of neighbors to use", "data_type": "int"}
},
# If you have a pipeline with subcomponents, such as preprocessing, add them here.
components={},
)
knn_flow.publish()
print(f"knn_flow was published with the ID {knn_flow.flow_id}")
# %% [markdown]
# Second, we create a run to store the results associated with the flow.
# %%
# Format the predictions for OpenML
predictions = []
for test_index, y_true_i, y_pred_i, y_pred_proba_i in zip(
test_indices, y_test, y_pred, y_pred_proba, strict=False
):
predictions.append(
openml.runs.functions.format_prediction(
task=task,
repeat=0,
fold=0,
index=test_index,
prediction=y_pred_i,
truth=y_true_i,
proba=dict(zip(task.class_labels, y_pred_proba_i, strict=False)),
)
)
# Format the parameters for OpenML
oml_knn_parameters = [
{"oml:name": k, "oml:value": v, "oml:component": knn_flow.flow_id}
for k, v in knn_parameters.items()
]
knn_run = openml.runs.OpenMLRun(
task_id=task.task_id,
flow_id=knn_flow.flow_id,
dataset_id=dataset.dataset_id,
parameter_settings=oml_knn_parameters,
data_content=predictions,
tags=["openml_tutorial_knn"],
description_text="Run generated by the tutorial.",
)
knn_run = knn_run.publish()
print(f"Run was uploaded to {knn_run.openml_url}")
print(f"The flow can be found at {knn_run.flow.openml_url}")
# %%
openml.config.stop_using_configuration_for_example()