add more tests for different input types for attribute validation

openml · PGijsbers · Oct 29, 2020 · Oct 28, 2020 · Oct 28, 2020 · Oct 28, 2020
commit afdd949186a17a925b1ef3bf11b48546876cc429
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
@@ -1419,9 +1419,11 @@ def test_data_fork(self):
 
 @pytest.mark.parametrize(
     "default_target_attribute,row_id_attribute,ignore_attribute",
-    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
+    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong"),
+    ("wrong,sunny", None, None),(None, None, "wrong,sunny"),
+    (["wrong","sunny"], None, None), (None, None, ["wrong","sunny"])],
 )
-def test_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+def test_invalid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
     data = [
         ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
         ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -1464,3 +1466,52 @@ def test_attribute_validations(default_target_attribute, row_id_attribute, ignor
             original_data_url=original_data_url,
             paper_url=paper_url,
         )
+
+@pytest.mark.parametrize(
+    "default_target_attribute,row_id_attribute,ignore_attribute",
+    [("outlook", None, None), (None, "outlook", None), (None, None, "outlook"),
+    ("outlook,windy", None, None), (None, None, "outlook,windy"),
+    (["outlook","windy"], None, None), (None, None, ["outlook","windy"])],
+)
+def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+    data = [
+        ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+        ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+        ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+        ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+        ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+    ]
+    column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+    df = pd.DataFrame(data, columns=column_names)
+    # enforce the type of each column
+    df["outlook"] = df["outlook"].astype("category")
+    df["windy"] = df["windy"].astype("bool")
+    df["play"] = df["play"].astype("category")
+    # meta-information
+    name = "pandas_testing_dataset"
+    description = "Synthetic dataset created from a Pandas DataFrame"
+    creator = "OpenML tester"
+    collection_date = "01-01-2018"
+    language = "English"
+    licence = "MIT"
+    citation = "None"
+    original_data_url = "http://openml.github.io/openml-python"
+    paper_url = "http://openml.github.io/openml-python"
+    _ = openml.datasets.functions.create_dataset(
+        name=name,
+        description=description,
+        creator=creator,
+        contributor=None,
+        collection_date=collection_date,
+        language=language,
+        licence=licence,
+        default_target_attribute=default_target_attribute,
+        row_id_attribute=row_id_attribute,
+        ignore_attribute=ignore_attribute,
+        citation=citation,
+        attributes="auto",
+        data=df,
+        version_label="test",
+        original_data_url=original_data_url,
+        paper_url=paper_url,
+    )