Skip to content
Prev Previous commit
Next Next commit
add more tests for different input types for attribute validation
  • Loading branch information
madel0093 committed Oct 29, 2020
commit afdd949186a17a925b1ef3bf11b48546876cc429
55 changes: 53 additions & 2 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,9 +1419,11 @@ def test_data_fork(self):

@pytest.mark.parametrize(
"default_target_attribute,row_id_attribute,ignore_attribute",
[("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
[("wrong", None, None), (None, "wrong", None), (None, None, "wrong"),
("wrong,sunny", None, None),(None, None, "wrong,sunny"),
(["wrong","sunny"], None, None), (None, None, ["wrong","sunny"])],
)
def test_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
def test_invalid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
data = [
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
["b", "sunny", 80.0, 90.0, "TRUE", "no"],
Expand Down Expand Up @@ -1464,3 +1466,52 @@ def test_attribute_validations(default_target_attribute, row_id_attribute, ignor
original_data_url=original_data_url,
paper_url=paper_url,
)

@pytest.mark.parametrize(
"default_target_attribute,row_id_attribute,ignore_attribute",
[("outlook", None, None), (None, "outlook", None), (None, None, "outlook"),
("outlook,windy", None, None), (None, None, "outlook,windy"),
(["outlook","windy"], None, None), (None, None, ["outlook","windy"])],
)
def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
data = [
["a", "sunny", 85.0, 85.0, "FALSE", "no"],
["b", "sunny", 80.0, 90.0, "TRUE", "no"],
["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
]
column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
df = pd.DataFrame(data, columns=column_names)
# enforce the type of each column
df["outlook"] = df["outlook"].astype("category")
df["windy"] = df["windy"].astype("bool")
df["play"] = df["play"].astype("category")
# meta-information
name = "pandas_testing_dataset"
description = "Synthetic dataset created from a Pandas DataFrame"
creator = "OpenML tester"
collection_date = "01-01-2018"
language = "English"
licence = "MIT"
citation = "None"
original_data_url = "http://openml.github.io/openml-python"
paper_url = "http://openml.github.io/openml-python"
_ = openml.datasets.functions.create_dataset(
name=name,
description=description,
creator=creator,
contributor=None,
collection_date=collection_date,
language=language,
licence=licence,
default_target_attribute=default_target_attribute,
row_id_attribute=row_id_attribute,
ignore_attribute=ignore_attribute,
citation=citation,
attributes="auto",
data=df,
version_label="test",
original_data_url=original_data_url,
paper_url=paper_url,
)