From 1c6c5de6ea65606f8a537eb431c048656b0dc631 Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Wed, 28 Oct 2020 14:53:57 +0100
Subject: [PATCH 1/6] add validation for ignore_attributes and
 default_target_attribute at craete_dataset

---
 openml/datasets/functions.py                  | 28 ++++++
 tests/test_datasets/test_dataset_functions.py | 95 ++++++++++++++++++-
 2 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 84943b244..816ca1a53 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -333,6 +333,28 @@ def _load_features_from_file(features_file: str) -> Dict:
         return xml_dict["oml:data_features"]
 
 
+def _expand_parameter(parameter):
+    expanded_parameter = []
+    if isinstance(parameter, str):
+        expanded_parameter = [x.strip() for x in parameter.split(",")]
+    elif isinstance(parameter, list):
+        expanded_parameter = parameter
+    return expanded_parameter
+
+
+def _validated_data_attributes(attributes, data_attributes, parameter_name):
+    if attributes is not None:
+        for attribute_ in attributes:
+            is_row_id_an_attribute = any([attr[0] == attribute_ for attr in data_attributes])
+            if not is_row_id_an_attribute:
+                raise ValueError(
+                    "all attribute of '{}' should be one of the data attribute. "
+                    " Got '{}' while candidates are {}.".format(
+                        parameter_name, attribute_, [attr[0] for attr in data_attributes]
+                    )
+                )
+
+
 def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
     """
     Check if the dataset ids provided are active.
@@ -636,6 +658,7 @@ def create_dataset(
     ignore_attribute : str | list
         Attributes that should be excluded in modelling,
         such as identifiers and indexes.
+        Can have multiple values, comma separated.
     citation : str
         Reference(s) that should be cited when building on this data.
     version_label : str, optional
@@ -687,6 +710,11 @@ def create_dataset(
                     attributes_[attr_idx] = (attr_name, attributes[attr_name])
     else:
         attributes_ = attributes
+    ignore_attributes = _expand_parameter(ignore_attribute)
+    _validated_data_attributes(ignore_attributes, attributes_, "ignore_attribute")
+
+    default_target_attributes = _expand_parameter(default_target_attribute)
+    _validated_data_attributes(default_target_attributes, attributes_, "default_target_attribute")
 
     if row_id_attribute is not None:
         is_row_id_an_attribute = any([attr[0] == row_id_attribute for attr in attributes_])
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index c6e6f78f8..ca72773a5 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -876,6 +876,94 @@ def test_get_online_dataset_format(self):
             "The format of the ARFF files is different",
         )
 
+    def test_create_dataset_default_target_attribute_validation(self):
+        data = [
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+        ]
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+        df = pd.DataFrame(data, columns=column_names)
+        # enforce the type of each column
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
+        # meta-information
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
+        with self.assertRaises(ValueError):
+            _ = openml.datasets.functions.create_dataset(
+                name=name,
+                description=description,
+                creator=creator,
+                contributor=None,
+                collection_date=collection_date,
+                language=language,
+                licence=licence,
+                default_target_attribute="wrong",
+                row_id_attribute=None,
+                ignore_attribute=None,
+                citation=citation,
+                attributes="auto",
+                data=df,
+                version_label="test",
+                original_data_url=original_data_url,
+                paper_url=paper_url,
+            )
+
+    def test_create_dataset_ignore_attribute_validation(self):
+        data = [
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+        ]
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+        df = pd.DataFrame(data, columns=column_names)
+        # enforce the type of each column
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
+        # meta-information
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
+        with self.assertRaises(ValueError):
+            _ = openml.datasets.functions.create_dataset(
+                name=name,
+                description=description,
+                creator=creator,
+                contributor=None,
+                collection_date=collection_date,
+                language=language,
+                licence=licence,
+                default_target_attribute="play",
+                row_id_attribute=None,
+                ignore_attribute=["rnd_str", "wrong"],
+                citation=citation,
+                attributes="auto",
+                data=df,
+                version_label="test",
+                original_data_url=original_data_url,
+                paper_url=paper_url,
+            )
+
     def test_create_dataset_pandas(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -897,7 +985,6 @@ def test_create_dataset_pandas(self):
         collection_date = "01-01-2018"
         language = "English"
         licence = "MIT"
-        default_target_attribute = "play"
         citation = "None"
         original_data_url = "http://openml.github.io/openml-python"
         paper_url = "http://openml.github.io/openml-python"
@@ -909,7 +996,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="play",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
@@ -944,7 +1031,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="y",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
@@ -980,7 +1067,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="rnd_str",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,

From 07c85cc58101e602bbb3180532d5694c2fd2b98e Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Wed, 28 Oct 2020 17:33:51 +0100
Subject: [PATCH 2/6] update naming convetions and adding type hints. using
 pytest parametrize with attribute validation

---
 openml/datasets/functions.py                  | 21 ++++----
 tests/test_datasets/test_dataset_functions.py | 49 ++-----------------
 2 files changed, 13 insertions(+), 57 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 816ca1a53..a0cec2d66 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -333,7 +333,7 @@ def _load_features_from_file(features_file: str) -> Dict:
         return xml_dict["oml:data_features"]
 
 
-def _expand_parameter(parameter):
+def _expand_parameter(parameter: Union[str, list]):
     expanded_parameter = []
     if isinstance(parameter, str):
         expanded_parameter = [x.strip() for x in parameter.split(",")]
@@ -342,17 +342,16 @@ def _expand_parameter(parameter):
     return expanded_parameter
 
 
-def _validated_data_attributes(attributes, data_attributes, parameter_name):
-    if attributes is not None:
-        for attribute_ in attributes:
-            is_row_id_an_attribute = any([attr[0] == attribute_ for attr in data_attributes])
-            if not is_row_id_an_attribute:
-                raise ValueError(
-                    "all attribute of '{}' should be one of the data attribute. "
-                    " Got '{}' while candidates are {}.".format(
-                        parameter_name, attribute_, [attr[0] for attr in data_attributes]
-                    )
+def _validated_data_attributes(attributes: list, data_attributes: list, parameter_name: str):
+    for attribute_ in attributes:
+        is_attribute_a_data_attribute = any([attr[0] == attribute_ for attr in data_attributes])
+        if not is_attribute_a_data_attribute:
+            raise ValueError(
+                "all attribute of '{}' should be one of the data attribute. "
+                " Got '{}' while candidates are {}.".format(
+                    parameter_name, attribute_, [attr[0] for attr in data_attributes]
                 )
+            )
 
 
 def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index ca72773a5..5841288af 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -875,8 +875,9 @@ def test_get_online_dataset_format(self):
             _get_online_dataset_format(dataset_id),
             "The format of the ARFF files is different",
         )
-
-    def test_create_dataset_default_target_attribute_validation(self):
+    @pytest.mark.parametrize("default_target_attribute,row_id_attribute,ignore_attribute", 
+    [("wrong", None,None), (None,"wrong",None), (None,None,"wrong")])
+    def test_attribute_validations(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
             ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -920,50 +921,6 @@ def test_create_dataset_default_target_attribute_validation(self):
                 paper_url=paper_url,
             )
 
-    def test_create_dataset_ignore_attribute_validation(self):
-        data = [
-            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
-            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
-            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
-            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
-            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
-        ]
-        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
-        df = pd.DataFrame(data, columns=column_names)
-        # enforce the type of each column
-        df["outlook"] = df["outlook"].astype("category")
-        df["windy"] = df["windy"].astype("bool")
-        df["play"] = df["play"].astype("category")
-        # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
-        description = "Synthetic dataset created from a Pandas DataFrame"
-        creator = "OpenML tester"
-        collection_date = "01-01-2018"
-        language = "English"
-        licence = "MIT"
-        citation = "None"
-        original_data_url = "http://openml.github.io/openml-python"
-        paper_url = "http://openml.github.io/openml-python"
-        with self.assertRaises(ValueError):
-            _ = openml.datasets.functions.create_dataset(
-                name=name,
-                description=description,
-                creator=creator,
-                contributor=None,
-                collection_date=collection_date,
-                language=language,
-                licence=licence,
-                default_target_attribute="play",
-                row_id_attribute=None,
-                ignore_attribute=["rnd_str", "wrong"],
-                citation=citation,
-                attributes="auto",
-                data=df,
-                version_label="test",
-                original_data_url=original_data_url,
-                paper_url=paper_url,
-            )
-
     def test_create_dataset_pandas(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],

From ff3d27aea03f8ef3cdf36198144678fe28d82c8c Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Wed, 28 Oct 2020 18:16:44 +0100
Subject: [PATCH 3/6] formating long lines and update types hint for return
 values

---
 openml/datasets/functions.py                  | 6 ++++--
 tests/test_datasets/test_dataset_functions.py | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index a0cec2d66..e8044aefb 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -333,7 +333,7 @@ def _load_features_from_file(features_file: str) -> Dict:
         return xml_dict["oml:data_features"]
 
 
-def _expand_parameter(parameter: Union[str, list]):
+def _expand_parameter(parameter: Union[str, List[str]]) -> List[str]:
     expanded_parameter = []
     if isinstance(parameter, str):
         expanded_parameter = [x.strip() for x in parameter.split(",")]
@@ -342,7 +342,9 @@ def _expand_parameter(parameter: Union[str, list]):
     return expanded_parameter
 
 
-def _validated_data_attributes(attributes: list, data_attributes: list, parameter_name: str):
+def _validated_data_attributes(
+    attributes: List[str], data_attributes: List[str], parameter_name: str
+) -> None:
     for attribute_ in attributes:
         is_attribute_a_data_attribute = any([attr[0] == attribute_ for attr in data_attributes])
         if not is_attribute_a_data_attribute:
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 5841288af..adfcec5b9 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -875,8 +875,11 @@ def test_get_online_dataset_format(self):
             _get_online_dataset_format(dataset_id),
             "The format of the ARFF files is different",
         )
-    @pytest.mark.parametrize("default_target_attribute,row_id_attribute,ignore_attribute", 
-    [("wrong", None,None), (None,"wrong",None), (None,None,"wrong")])
+
+    @pytest.mark.parametrize(
+        "default_target_attribute,row_id_attribute,ignore_attribute",
+        [("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
+    )
     def test_attribute_validations(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],

From ace60aa606ac7c4d8b1c811d02bf89fd192789a8 Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Thu, 29 Oct 2020 12:08:02 +0100
Subject: [PATCH 4/6] update test_attribute_validations to use
 pytest.mark.parametrize

---
 tests/test_datasets/test_dataset_functions.py | 97 ++++++++++---------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index adfcec5b9..0a237a84a 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -876,54 +876,6 @@ def test_get_online_dataset_format(self):
             "The format of the ARFF files is different",
         )
 
-    @pytest.mark.parametrize(
-        "default_target_attribute,row_id_attribute,ignore_attribute",
-        [("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
-    )
-    def test_attribute_validations(self):
-        data = [
-            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
-            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
-            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
-            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
-            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
-        ]
-        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
-        df = pd.DataFrame(data, columns=column_names)
-        # enforce the type of each column
-        df["outlook"] = df["outlook"].astype("category")
-        df["windy"] = df["windy"].astype("bool")
-        df["play"] = df["play"].astype("category")
-        # meta-information
-        name = "%s-pandas_testing_dataset" % self._get_sentinel()
-        description = "Synthetic dataset created from a Pandas DataFrame"
-        creator = "OpenML tester"
-        collection_date = "01-01-2018"
-        language = "English"
-        licence = "MIT"
-        citation = "None"
-        original_data_url = "http://openml.github.io/openml-python"
-        paper_url = "http://openml.github.io/openml-python"
-        with self.assertRaises(ValueError):
-            _ = openml.datasets.functions.create_dataset(
-                name=name,
-                description=description,
-                creator=creator,
-                contributor=None,
-                collection_date=collection_date,
-                language=language,
-                licence=licence,
-                default_target_attribute="wrong",
-                row_id_attribute=None,
-                ignore_attribute=None,
-                citation=citation,
-                attributes="auto",
-                data=df,
-                version_label="test",
-                original_data_url=original_data_url,
-                paper_url=paper_url,
-            )
-
     def test_create_dataset_pandas(self):
         data = [
             ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
@@ -1463,3 +1415,52 @@ def test_data_fork(self):
         self.assertRaisesRegex(
             OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999,
         )
+
+
+@pytest.mark.parametrize(
+    "default_target_attribute,row_id_attribute,ignore_attribute",
+    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
+)
+def test_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+    data = [
+        ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+        ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+        ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+        ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+        ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+    ]
+    column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+    df = pd.DataFrame(data, columns=column_names)
+    # enforce the type of each column
+    df["outlook"] = df["outlook"].astype("category")
+    df["windy"] = df["windy"].astype("bool")
+    df["play"] = df["play"].astype("category")
+    # meta-information
+    name = "pandas_testing_dataset"
+    description = "Synthetic dataset created from a Pandas DataFrame"
+    creator = "OpenML tester"
+    collection_date = "01-01-2018"
+    language = "English"
+    licence = "MIT"
+    citation = "None"
+    original_data_url = "http://openml.github.io/openml-python"
+    paper_url = "http://openml.github.io/openml-python"
+    with pytest.raises(ValueError, match="should be one of the data attribute"):
+        _ = openml.datasets.functions.create_dataset(
+            name=name,
+            description=description,
+            creator=creator,
+            contributor=None,
+            collection_date=collection_date,
+            language=language,
+            licence=licence,
+            default_target_attribute=default_target_attribute,
+            row_id_attribute=row_id_attribute,
+            ignore_attribute=ignore_attribute,
+            citation=citation,
+            attributes="auto",
+            data=df,
+            version_label="test",
+            original_data_url=original_data_url,
+            paper_url=paper_url,
+        )

From afdd949186a17a925b1ef3bf11b48546876cc429 Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Thu, 29 Oct 2020 13:56:29 +0100
Subject: [PATCH 5/6] add more tests for different input types for attribute
 validation

---
 tests/test_datasets/test_dataset_functions.py | 55 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 0a237a84a..54b916753 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1419,9 +1419,11 @@ def test_data_fork(self):
 
 @pytest.mark.parametrize(
     "default_target_attribute,row_id_attribute,ignore_attribute",
-    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong")],
+    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong"),
+    ("wrong,sunny", None, None),(None, None, "wrong,sunny"),
+    (["wrong","sunny"], None, None), (None, None, ["wrong","sunny"])],
 )
-def test_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+def test_invalid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
     data = [
         ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
         ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -1464,3 +1466,52 @@ def test_attribute_validations(default_target_attribute, row_id_attribute, ignor
             original_data_url=original_data_url,
             paper_url=paper_url,
         )
+
+@pytest.mark.parametrize(
+    "default_target_attribute,row_id_attribute,ignore_attribute",
+    [("outlook", None, None), (None, "outlook", None), (None, None, "outlook"),
+    ("outlook,windy", None, None), (None, None, "outlook,windy"),
+    (["outlook","windy"], None, None), (None, None, ["outlook","windy"])],
+)
+def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+    data = [
+        ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+        ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+        ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+        ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+        ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+    ]
+    column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+    df = pd.DataFrame(data, columns=column_names)
+    # enforce the type of each column
+    df["outlook"] = df["outlook"].astype("category")
+    df["windy"] = df["windy"].astype("bool")
+    df["play"] = df["play"].astype("category")
+    # meta-information
+    name = "pandas_testing_dataset"
+    description = "Synthetic dataset created from a Pandas DataFrame"
+    creator = "OpenML tester"
+    collection_date = "01-01-2018"
+    language = "English"
+    licence = "MIT"
+    citation = "None"
+    original_data_url = "http://openml.github.io/openml-python"
+    paper_url = "http://openml.github.io/openml-python"
+    _ = openml.datasets.functions.create_dataset(
+        name=name,
+        description=description,
+        creator=creator,
+        contributor=None,
+        collection_date=collection_date,
+        language=language,
+        licence=licence,
+        default_target_attribute=default_target_attribute,
+        row_id_attribute=row_id_attribute,
+        ignore_attribute=ignore_attribute,
+        citation=citation,
+        attributes="auto",
+        data=df,
+        version_label="test",
+        original_data_url=original_data_url,
+        paper_url=paper_url,
+    )

From 4cf640f8db6c4af2f1c49f8b8c129b11622375e7 Mon Sep 17 00:00:00 2001
From: adel <m.adel0093@gmail.com>
Date: Thu, 29 Oct 2020 13:57:11 +0100
Subject: [PATCH 6/6] update formatting

---
 tests/test_datasets/test_dataset_functions.py | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 54b916753..50f0e43b7 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1419,11 +1419,19 @@ def test_data_fork(self):
 
 @pytest.mark.parametrize(
     "default_target_attribute,row_id_attribute,ignore_attribute",
-    [("wrong", None, None), (None, "wrong", None), (None, None, "wrong"),
-    ("wrong,sunny", None, None),(None, None, "wrong,sunny"),
-    (["wrong","sunny"], None, None), (None, None, ["wrong","sunny"])],
+    [
+        ("wrong", None, None),
+        (None, "wrong", None),
+        (None, None, "wrong"),
+        ("wrong,sunny", None, None),
+        (None, None, "wrong,sunny"),
+        (["wrong", "sunny"], None, None),
+        (None, None, ["wrong", "sunny"]),
+    ],
 )
-def test_invalid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+def test_invalid_attribute_validations(
+    default_target_attribute, row_id_attribute, ignore_attribute
+):
     data = [
         ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
         ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
@@ -1467,11 +1475,18 @@ def test_invalid_attribute_validations(default_target_attribute, row_id_attribut
             paper_url=paper_url,
         )
 
+
 @pytest.mark.parametrize(
     "default_target_attribute,row_id_attribute,ignore_attribute",
-    [("outlook", None, None), (None, "outlook", None), (None, None, "outlook"),
-    ("outlook,windy", None, None), (None, None, "outlook,windy"),
-    (["outlook","windy"], None, None), (None, None, ["outlook","windy"])],
+    [
+        ("outlook", None, None),
+        (None, "outlook", None),
+        (None, None, "outlook"),
+        ("outlook,windy", None, None),
+        (None, None, "outlook,windy"),
+        (["outlook", "windy"], None, None),
+        (None, None, ["outlook", "windy"]),
+    ],
 )
 def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
     data = [