Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
b45f6f2
Adding importable helper functions
Neeratyoy Oct 29, 2020
8e7ea0b
Changing import of cat, cont
Neeratyoy Oct 29, 2020
102a084
Merge branch 'develop' into fix_773
Neeratyoy Oct 29, 2020
18a2dba
Better docstrings
Neeratyoy Oct 30, 2020
381c267
Adding unit test to check ColumnTransformer
Neeratyoy Oct 30, 2020
5dbff2e
Refinements from @mfeurer
Neeratyoy Nov 2, 2020
fc4ec73
Editing example to support both NumPy and Pandas
Neeratyoy Nov 2, 2020
8d5cad9
Merge branch 'develop' into fix_773
Neeratyoy Nov 3, 2020
3d66404
Merge branch 'develop' into fix_773
Neeratyoy Nov 4, 2020
90c8de6
Unit test fix to mark for deletion
Neeratyoy Nov 4, 2020
e0af15e
Making some unit tests work
Neeratyoy Nov 10, 2020
14aa11d
Waiting for dataset to be processed
Neeratyoy Nov 16, 2020
31d48d8
Minor test collection fix
Neeratyoy Nov 16, 2020
431447c
Template to handle missing tasks
Neeratyoy Nov 30, 2020
cc3199e
Accounting for more missing tasks:
Neeratyoy Nov 30, 2020
8a29668
Fixing some more unit tests
Neeratyoy Nov 30, 2020
405e03c
Simplifying check_task_existence
Neeratyoy Nov 30, 2020
caf4f46
black changes
Neeratyoy Dec 4, 2020
b308e71
Minor formatting
Neeratyoy Dec 8, 2020
436a9fe
Handling task exists check
Neeratyoy Dec 9, 2020
ddd8b04
Testing edited check task func
Neeratyoy Dec 14, 2020
74ae622
Merge branch 'fix_unit_tests' of https://github.com/openml/openml-pyt…
Neeratyoy Dec 14, 2020
50ce90e
Flake fix
Neeratyoy Dec 15, 2020
aea2832
Updating with fixed unit tests from PR #1000
Neeratyoy Dec 15, 2020
56cd639
More retries on connection error
Neeratyoy Dec 16, 2020
8e8ea2e
Adding max_retries to config default
Neeratyoy Dec 17, 2020
d518beb
Update database retry unit test
Neeratyoy Dec 17, 2020
37d9f6b
Print to debug hash exception
Neeratyoy Dec 17, 2020
9bd4892
Fixing checksum unit test
Neeratyoy Dec 17, 2020
dc41b5d
Retry on _download_text_file
Neeratyoy Dec 18, 2020
396cb8d
Update datasets_tutorial.py
mfeurer Dec 21, 2020
8f380de
Update custom_flow_tutorial.py
mfeurer Dec 21, 2020
bc1745e
Update test_study_functions.py
mfeurer Dec 21, 2020
d95b5e6
Update test_dataset_functions.py
mfeurer Dec 21, 2020
d58ca5a
Merge branch 'fix_unit_tests' into fix_773
Neeratyoy Dec 21, 2020
91c6cf5
more retries, but also more time between retries
mfeurer Dec 21, 2020
b43a0e0
Merge branch 'fix_unit_tests' of https://github.com/openml/openml-pyt…
Neeratyoy Dec 21, 2020
a9430b3
allow for even more retries on get calls
mfeurer Dec 21, 2020
e9cfba8
Catching failed get task
Neeratyoy Dec 21, 2020
c13f6ce
Merge branch 'fix_unit_tests' of https://github.com/openml/openml-pyt…
Neeratyoy Dec 21, 2020
3d7abc2
undo stupid change
mfeurer Dec 21, 2020
94576b1
Merge branch 'fix_unit_tests' of https://github.com/openml/openml-pyt…
Neeratyoy Dec 21, 2020
b5e1242
fix one more test
mfeurer Dec 21, 2020
d764aad
Merge branch 'fix_unit_tests' into fix_773
Neeratyoy Dec 21, 2020
f5e4a3e
Refactoring md5 hash check inside _send_request
Neeratyoy Dec 21, 2020
c065dfc
Merge branch 'fix_unit_tests' into fix_773
Neeratyoy Dec 21, 2020
07ce722
Fixing a fairly common unit test fail
Neeratyoy Dec 22, 2020
82e1b72
Reverting loose check on unit test
Neeratyoy Dec 23, 2020
936c252
Merge branch 'fix_unit_tests' into fix_773
Neeratyoy Dec 23, 2020
fc8b464
Merge branch 'develop' into fix_773
PGijsbers Dec 24, 2020
7ef965b
Updating examples to run on sklearn 0.24
Jan 8, 2021
8f693e4
Spawning tests for sklearn 0.24
Jan 8, 2021
9198489
Adding numpy import
Jan 8, 2021
46ab043
Fixing integer type check to allow np.integer
Neeratyoy Jan 22, 2021
c892b6b
Making unit tests run on sklearn 0.24
Neeratyoy Jan 22, 2021
ac173aa
black fix
Neeratyoy Jan 25, 2021
1be82c3
Trying to loosen check on unit test as fix
Neeratyoy Jan 25, 2021
902cd3f
Updating with PR #982
Neeratyoy Jan 26, 2021
0e44a0b
Merge branch 'develop' into sklearn24-support
Neeratyoy Jan 28, 2021
2fd4849
simplify examples
mfeurer Jan 28, 2021
0ae7075
disable test for old python version
mfeurer Jan 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Testing edited check task func
  • Loading branch information
Neeratyoy committed Dec 14, 2020
commit ddd8b04f59669346c857002bd76e24f086333810
57 changes: 55 additions & 2 deletions openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import shutil
import sys
import time
from typing import Dict
from typing import Dict, Union, cast
import unittest
import warnings
import pandas as pd

# Currently, importing oslo raises a lot of warning that it will stop working
# under python3.8; remove this once they disappear
Expand Down Expand Up @@ -252,6 +253,58 @@ def _check_fold_timing_evaluations(
self.assertLessEqual(evaluation, max_val)


def check_task_existence(
task_type: TaskType, dataset_id: int, target_name: str, **kwargs
) -> Union[int, None]:
"""Checks if any task with exists on test server that matches the meta data.

Parameter
---------
task_type : openml.tasks.TaskType
ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
- Supervised classification: 1
- Supervised regression: 2
- Learning curve: 3
- Supervised data stream classification: 4
- Clustering: 5
- Machine Learning Challenge: 6
- Survival Analysis: 7
- Subgroup Discovery: 8
dataset_id : int
target_name : str

Return
------
int, None
"""
return_val = None
tasks = openml.tasks.list_tasks(task_type=task_type, output_format="dataframe")
if len(tasks) == 0:
return None
tasks = cast(pd.DataFrame, tasks).loc[tasks["did"] == dataset_id]
if len(tasks) == 0:
return None
tasks = tasks.loc[tasks["target_feature"] == target_name]
if len(tasks) == 0:
return None
task_match = []
for task_id in tasks["tid"].to_list():
task_match.append(task_id)
task = openml.tasks.get_task(task_id)
for k, v in kwargs.items():
if getattr(task, k) != v:
# even if one of the meta-data key mismatches, then task_id is not a match
task_match.pop(-1)
break
# if task_id is retained in the task_match list, it passed all meta key-value matches
if len(task_match) == 1:
return_val = task_id
break
if len(task_match) == 0:
return_val = None
return return_val


try:
from sklearn.impute import SimpleImputer
except ImportError:
Expand All @@ -275,4 +328,4 @@ def cat(X):
return X.dtypes == "category"


__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "cat", "cont"]
__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "cat", "cont", "check_task_existence"]
47 changes: 0 additions & 47 deletions openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import xmltodict
import shutil
import typing
from typing import TYPE_CHECKING, List, Tuple, Union, Type
import warnings
import pandas as pd
Expand Down Expand Up @@ -33,52 +32,6 @@
pass


def check_task_existence(task_meta_data: dict) -> Union[int, None]:
"""Checks if any task with exists on test server that matches the meta data.

Parameter
---------
task_meta_data : dict
A dictionary containing meta-information on the task fetched from the test server.

Return
------
int, None
"""
return_val = None
try:
tasks = openml.tasks.list_tasks(output_format="dataframe")
tasks = typing.cast(pd.DataFrame, tasks).loc[
tasks["task_type"] == task_meta_data["task_type"]
]
if len(tasks) == 0:
return None
tasks = tasks.loc[tasks.did == task_meta_data["dataset_id"]]
if len(tasks) == 0:
return None
tasks = tasks.loc[tasks.target_feature == task_meta_data["target_name"]]
if len(tasks) == 0:
return None
task_match = []
for task_id in tasks.tid.values:
task_match.append(task_id)
task = openml.tasks.get_task(task_id)
for k, v in task_meta_data.items():
if getattr(task, k) != v:
# even if one of the meta-data key mismatches, then task_id is not a match
task_match.pop(-1)
break
# if task_id is retained in the task_match list, it passed all meta key-value matches
if len(task_match) == 1:
return_val = task_id
break
if len(task_match) == 0:
return_val = None
except openml.exceptions.OpenMLServerException:
return_val = None
return return_val


def extract_xml_tags(xml_tag_name, node, allow_none=True):
"""Helper to extract xml tags from xmltodict.

Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,7 +1351,7 @@ def test_data_edit_errors(self):
"original_data_url, default_target_attribute, row_id_attribute, "
"ignore_attribute or paper_url to edit.",
edit_dataset,
data_id=64,
data_id=64, # blood-transfusion-service-center
)
# Check server exception when unknown dataset is provided
self.assertRaisesRegex(
Expand Down
55 changes: 40 additions & 15 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import random
import time
import sys
import ast
import unittest.mock

import numpy as np
Expand All @@ -24,7 +25,8 @@
from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
from openml.runs.trace import OpenMLRunTrace
from openml.tasks import TaskType
from openml.utils import check_task_existence
from openml.testing import check_task_existence
from openml.exceptions import OpenMLServerException

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection._search import BaseSearchCV
Expand Down Expand Up @@ -60,13 +62,13 @@ class TestRun(TestBase):
# unit tests to pass by uploading a similar task at runtime
TASK_META_DATA = {
1605: {
"task_type": "Supervised Regression",
"dataset_id": 123,
"task_type": TaskType.SUPERVISED_REGRESSION,
"dataset_id": 123, # quake
"estimation_procedure_id": 7,
"target_name": "richter",
},
1481: {
"task_type": "Supervised Classification",
"task_type": TaskType.SUPERVISED_CLASSIFICATION,
"dataset_id": 128, # iris
"estimation_procedure_id": 1,
"class_labels": ["Iris-setosa", "Iris-versicolor", "Iris-virginica"],
Expand Down Expand Up @@ -517,7 +519,7 @@ def _run_and_upload_classification(
def _run_and_upload_regression(
self, clf, task_id, n_missing_vals, n_test_obs, flow_expected_rsv, sentinel=None
):
num_folds = 10 # because of holdout
num_folds = 10 # because of cross-validation
num_iterations = 5 # for base search algorithms
metric = sklearn.metrics.mean_absolute_error # metric class
metric_name = "mean_absolute_error" # openml metric name
Expand Down Expand Up @@ -549,15 +551,23 @@ def test_run_and_upload_linear_regression(self):
task_id = self.TEST_SERVER_TASK_REGRESSION[0]

task_meta_data = self.TASK_META_DATA[task_id]
_task_id = check_task_existence(task_meta_data)
_task_id = check_task_existence(**task_meta_data)
if _task_id is not None:
task_id = _task_id
else:
task_meta_data["task_type"] = TaskType.SUPERVISED_REGRESSION
new_task = openml.tasks.create_task(**task_meta_data)
# publishes the new task
new_task = new_task.publish()
task_id = new_task.task_id
try:
new_task = new_task.publish()
task_id = new_task.task_id
except OpenMLServerException as e:
if e.code == 614: # Task already exists
# the exception message contains the task_id that was matched in the format
# 'Task already exists. - matched id(s): [xxxx]'
task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
else:
raise Exception(repr(e))
# mark to remove the uploaded task
TestBase._mark_entity_for_removal("task", task_id)
TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
Expand Down Expand Up @@ -966,15 +976,23 @@ def test_initialize_model_from_run(self):

task_id = 1481 # this task may be deleted during test server maintenance
task_meta_data = self.TASK_META_DATA[task_id]
_task_id = check_task_existence(task_meta_data)
_task_id = check_task_existence(**task_meta_data)
if _task_id is not None:
task_id = _task_id
else:
task_meta_data["task_type"] = TaskType.SUPERVISED_CLASSIFICATION
new_task = openml.tasks.create_task(**task_meta_data)
# publishes the new task
new_task = new_task.publish()
task_id = new_task.task_id
try:
new_task = new_task.publish()
task_id = new_task.task_id
except OpenMLServerException as e:
if e.code == 614: # Task already exists
# the exception message contains the task_id that was matched in the format
# 'Task already exists. - matched id(s): [xxxx]'
task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
else:
raise Exception(repr(e))
# mark to remove the uploaded task
TestBase._mark_entity_for_removal("task", task_id)
TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
Expand Down Expand Up @@ -1514,15 +1532,23 @@ def test_format_prediction_task_regression(self):
task_id = self.TEST_SERVER_TASK_REGRESSION[0]

task_meta_data = self.TASK_META_DATA[task_id]
_task_id = check_task_existence(task_meta_data)
_task_id = check_task_existence(**task_meta_data)
if _task_id is not None:
task_id = _task_id
else:
task_meta_data["task_type"] = TaskType.SUPERVISED_REGRESSION
new_task = openml.tasks.create_task(**task_meta_data)
# publishes the new task
new_task = new_task.publish()
task_id = new_task.task_id
try:
new_task = new_task.publish()
task_id = new_task.task_id
except OpenMLServerException as e:
if e.code == 614: # Task already exists
# the exception message contains the task_id that was matched in the format
# 'Task already exists. - matched id(s): [xxxx]'
task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
else:
raise Exception(repr(e))
# mark to remove the uploaded task
TestBase._mark_entity_for_removal("task", task_id)
TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
Expand All @@ -1531,4 +1557,3 @@ def test_format_prediction_task_regression(self):
ignored_input = [0] * 5
res = format_prediction(regression, *ignored_input)
self.assertListEqual(res, [0] * 5)
self.assertListEqual(res, [0] * 5)
22 changes: 16 additions & 6 deletions tests/test_tasks/test_regression_task.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# License: BSD 3-Clause

import ast
import numpy as np

import openml
from openml.tasks import TaskType
from openml.testing import TestBase
from openml.utils import check_task_existence
from openml.testing import check_task_existence
from openml.exceptions import OpenMLServerException
from .test_supervised_task import OpenMLSupervisedTaskTest


Expand All @@ -17,20 +19,28 @@ def setUp(self, n_levels: int = 1):
super(OpenMLRegressionTaskTest, self).setUp()

task_meta_data = {
"task_type": "Supervised Regression",
"dataset_id": 105,
"task_type": TaskType.SUPERVISED_REGRESSION,
"dataset_id": 105, # wisconsin
"estimation_procedure_id": 7,
"target_name": "time",
}
_task_id = check_task_existence(task_meta_data)
_task_id = check_task_existence(**task_meta_data)
if _task_id is not None:
task_id = _task_id
else:
task_meta_data["task_type"] = TaskType.SUPERVISED_REGRESSION
new_task = openml.tasks.create_task(**task_meta_data)
# publishes the new task
new_task = new_task.publish()
task_id = new_task.task_id
try:
new_task = new_task.publish()
task_id = new_task.task_id
except OpenMLServerException as e:
if e.code == 614: # Task already exists
# the exception message contains the task_id that was matched in the format
# 'Task already exists. - matched id(s): [xxxx]'
task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
else:
raise Exception(repr(e))
# mark to remove the uploaded task
TestBase._mark_entity_for_removal("task", task_id)
TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
Expand Down