Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4956a51
add test and fix for switch of ground truth and predictions
LennartPurucker Feb 20, 2023
fc642c1
undo import optimization
LennartPurucker Feb 20, 2023
2da1109
fix bug with model passing to function
LennartPurucker Feb 20, 2023
0583668
fix order in other tests
LennartPurucker Feb 20, 2023
1fe8bc9
Merge branch 'openml:develop' into develop
LennartPurucker Feb 20, 2023
14cbd04
update progress.rst
LennartPurucker Feb 21, 2023
ceb1d53
new unit test for run consistency and bug fixed
LennartPurucker Feb 21, 2023
37500a7
clarify new assert
LennartPurucker Feb 21, 2023
921cf10
Merge pull request #1 from LennartPurucker/develop_ext
LennartPurucker Feb 22, 2023
3e97992
Merge branch 'openml:develop' into develop
LennartPurucker Feb 22, 2023
9f47b91
minor loop refactor
LennartPurucker Feb 22, 2023
14d4299
Merge remote-tracking branch 'origin/develop' into develop
LennartPurucker Feb 22, 2023
8686317
refactor default to None
LennartPurucker Feb 22, 2023
8adb0bd
directly test prediction data equal
LennartPurucker Feb 23, 2023
04ca611
Update tests/test_runs/test_run.py
LennartPurucker Feb 23, 2023
f996c0a
Merge branch 'develop' into develop
LennartPurucker Feb 23, 2023
3dac7a7
Mark sklearn tests (#1202)
PGijsbers Feb 23, 2023
1bf8c0e
add test and fix for switch of ground truth and predictions
LennartPurucker Feb 20, 2023
74e9c38
undo import optimization
LennartPurucker Feb 20, 2023
794cce8
Merge branch 'develop' of https://github.com/openml/openml-python int…
LennartPurucker Feb 23, 2023
b4c2030
fix mask error resulting from rebase
LennartPurucker Feb 23, 2023
3c5ff3e
make dummy classifier strategy consistent to avoid problems as a resu…
LennartPurucker Feb 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
new unit test for run consistency and bug fixed
  • Loading branch information
LennartPurucker committed Feb 21, 2023
commit ceb1d534e700676dd36aaab236b1f54933a8a3e7
8 changes: 8 additions & 0 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,14 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):

tags = openml.utils.extract_xml_tags("oml:tag", run)

# Make sure default values are used where needed to keep run objects identical
if not evaluations:
evaluations = None
if not fold_evaluations:
fold_evaluations = None
if not sample_evaluations:
sample_evaluations = None
Comment thread
PGijsbers marked this conversation as resolved.
Outdated

return OpenMLRun(
run_id=run_id,
uploader=uploader,
Expand Down
127 changes: 95 additions & 32 deletions tests/test_runs/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,32 @@ def test_tagging(self):
run_list = openml.runs.list_runs(tag=tag)
self.assertEqual(len(run_list), 0)

def _test_run_obj_equals(self, run, run_prime):
@staticmethod
def _test_prediction_data_equal(run, run_prime):
# Determine which attributes are numeric and which not
num_cols = np.array(
[d_type == "NUMERIC" for _, d_type in run._generate_arff_dict()["attributes"]]
)
# Get run data consistently
# (For run from server, .data_content does not exist)
run_data_content = run.predictions.values
run_prime_data_content = run_prime.predictions.values

# Assert numeric and string parts separately
numeric_part = np.array(run_data_content[:, num_cols], dtype=float)
numeric_part_prime = np.array(run_prime_data_content[:, num_cols], dtype=float)
string_part = run_data_content[:, ~num_cols]
string_part_prime = run_prime_data_content[:, ~num_cols]
np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
np.testing.assert_array_equal(string_part, string_part_prime)

def _test_run_obj_equals(self, run, run_prime, only_check_prediction_data=False):

if only_check_prediction_data:
# Only check prediction data because other fields may not need to be equal
self._test_prediction_data_equal(run, run_prime)
return

for dictionary in ["evaluations", "fold_evaluations", "sample_evaluations"]:
if getattr(run, dictionary) is not None:
self.assertDictEqual(getattr(run, dictionary), getattr(run_prime, dictionary))
Expand All @@ -51,20 +76,9 @@ def _test_run_obj_equals(self, run, run_prime):
if other is not None:
self.assertDictEqual(other, dict())
self.assertEqual(run._to_xml(), run_prime._to_xml())
self._test_prediction_data_equal(run, run_prime)

# Determine which attributes are numeric and which not
num_cols = np.array(
[d_type == "NUMERIC" for _, d_type in run._generate_arff_dict()["attributes"]]
)

# Assert numeric and string parts separately
numeric_part = np.array(np.array(run.data_content)[:, num_cols], dtype=float)
numeric_part_prime = np.array(np.array(run_prime.data_content)[:, num_cols], dtype=float)
string_part = np.array(run.data_content)[:, ~num_cols]
string_part_prime = np.array(run_prime.data_content)[:, ~num_cols]
np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
np.testing.assert_array_equal(string_part, string_part_prime)

# Test trace
if run.trace is not None:
run_trace_content = run.trace.trace_to_arff()["data"]
else:
Expand Down Expand Up @@ -197,6 +211,27 @@ def test_to_from_filesystem_no_model(self):
with self.assertRaises(ValueError, msg="Could not find model.pkl"):
openml.runs.OpenMLRun.from_filesystem(cache_path)

@staticmethod
def _get_models_tasks_for_tests():
model_clf = Pipeline(
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
)
model_reg = Pipeline(
[
("imputer", SimpleImputer(strategy="mean")),
(
"regressor",
# LR because dummy does not produce enough float-like values
LinearRegression(),
),
]
)

task_clf = openml.tasks.get_task(119) # diabetes; hold out validation
task_reg = openml.tasks.get_task(733) # quake; crossvalidation

return [(model_clf, task_clf), (model_reg, task_reg)]

@staticmethod
def assert_run_prediction_data(task, run, model):
# -- Get y_pred and y_true as it should be stored in the run
Expand Down Expand Up @@ -248,24 +283,7 @@ def test_publish_with_local_loaded_flow(self):
"""
extension = openml.extensions.sklearn.SklearnExtension()

model_clf = Pipeline(
[("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
)
model_reg = Pipeline(
[
("imputer", SimpleImputer(strategy="mean")),
(
"regressor",
# LR because dummy does not produce enough float-like values
LinearRegression(),
),
]
)

task_clf = openml.tasks.get_task(119) # diabetes; hold out validation
task_reg = openml.tasks.get_task(733) # quake; crossvalidation

for model, task in [(model_clf, task_clf), (model_reg, task_reg)]:
for model, task in self._get_models_tasks_for_tests():
# Make sure the flow does not exist on the server yet.
flow = extension.model_to_flow(model)
self._add_sentinel_to_flow_name(flow)
Expand All @@ -288,6 +306,8 @@ def test_publish_with_local_loaded_flow(self):
# obtain run from filesystem
loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
loaded_run.publish()

# Clean up
TestBase._mark_entity_for_removal("run", loaded_run.run_id)
TestBase.logger.info(
"collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
Expand All @@ -297,6 +317,49 @@ def test_publish_with_local_loaded_flow(self):
self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
openml.runs.get_run(loaded_run.run_id)

def test_offline_and_online_run_identical(self):

extension = openml.extensions.sklearn.SklearnExtension()

for model, task in self._get_models_tasks_for_tests():
# Make sure the flow does not exist on the server yet.
flow = extension.model_to_flow(model)
self._add_sentinel_to_flow_name(flow)
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))

run = openml.runs.run_flow_on_task(
flow=flow,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
upload_flow=False,
)

# Make sure that the flow has not been uploaded as requested.
self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))

# Load from filesystem
cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
run.to_filesystem(cache_path)
loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)

# Assert identical for offline - offline
self._test_run_obj_equals(run, loaded_run)

# Publish and test for offline - online
run.publish()
self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))

try:
online_run = openml.runs.get_run(run.run_id, ignore_cache=True)
self._test_run_obj_equals(run, online_run, only_check_prediction_data=True)
Comment thread
LennartPurucker marked this conversation as resolved.
Outdated
finally:
# Clean up
TestBase._mark_entity_for_removal("run", run.run_id)
TestBase.logger.info(
"collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
)

def test_run_setup_string_included_in_xml(self):
SETUP_STRING = "setup-string"
run = OpenMLRun(
Expand Down