Minor Documentation Fixes: TaskID for Example Custom Flow; Comment on…

… Homepage; More documentation for `components` (#1243) * fix task ID for Iris task * update comment on homepage * added additional documentation specific to the `components` parameter. * add change to progress.rst * Fix dataframe append being deprecated by replacing it with (backwards-compatible) pd.concat * fix logging example and add new changes to progress.rst * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix comment too long --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
openml · PGijsbers · Jul 4, 2023 · Apr 18, 2023 · Apr 25, 2023 · Jun 11, 2023
commit fb9f9eb9ff8988f7b183dc705e5f99ffe03f4285
diff --git a/doc/index.rst b/doc/index.rst
@@ -30,7 +30,7 @@ Example
             ('estimator', tree.DecisionTreeClassifier())
         ]
     )
-    # Download the OpenML task for the german credit card dataset with 10-fold
+    # Download the OpenML task for the pendigits dataset with 10-fold
     # cross-validation.
     task = openml.tasks.get_task(32)
     # Run the scikit-learn model on the task.

diff --git a/doc/progress.rst b/doc/progress.rst
@@ -9,6 +9,7 @@ Changelog
 0.13.1
 ~~~~~~
 
+ * DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
  * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
  * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
  * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.

diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py
@@ -37,8 +37,8 @@
 
 import logging
 
-openml.config.console_log.setLevel(logging.DEBUG)
-openml.config.file_log.setLevel(logging.WARNING)
+openml.config.set_console_log_level(logging.DEBUG)
+openml.config.set_file_log_level(logging.WARNING)
 openml.datasets.get_dataset("iris")
 
 # Now the log level that was previously written to file should also be shown in the console.

diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py
@@ -77,6 +77,8 @@
 # you can use the Random Forest Classifier flow as a *subflow*. It allows for
 # all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
 #
+# Note: you can currently only specific one subflow as part of the components.
+#
 # In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
 # This allows people to specify auto-sklearn hyperparameters used in this flow.
 # In general, using a subflow is not required.
@@ -87,6 +89,8 @@
 autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
 subflow = dict(
     components=OrderedDict(automl_tool=autosklearn_flow),
+    # If you do not want to reference a subflow, you can use the following:
+    # components=OrderedDict(),
 )
 
 ####################################################################################################
@@ -124,7 +128,7 @@
     OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
 ]
 
-task_id = 1965  # Iris Task
+task_id = 1200  # Iris Task
 task = openml.tasks.get_task(task_id)
 dataset_id = task.get_dataset().dataset_id
 

diff --git a/openml/utils.py b/openml/utils.py
@@ -283,7 +283,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
             if len(result) == 0:
                 result = new_batch
             else:
-                result = result.append(new_batch, ignore_index=True)
+                result = pd.concat([result, new_batch], ignore_index=True)
         else:
             # For output_format = 'dict' or 'object'
             result.update(new_batch)

diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
@@ -18,6 +18,23 @@ def mocked_perform_api_call(call, request_method):
 
     def test_list_all(self):
         openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
+        openml.utils._list_all(
+            listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
+        )
+
+    def test_list_all_with_multiple_batches(self):
+        res = openml.utils._list_all(
+            listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000
+        )
+        # Verify that test server state is still valid for this test to work as intended
+        #  -> If the number of results is less than 2000, the test can not test the
+        #  batching operation.
+        assert len(res) > 2000
+        openml.utils._list_all(
+            listing_call=openml.tasks.functions._list_tasks,
+            output_format="dataframe",
+            batch_size=2000,
+        )
 
     @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
     def test_list_all_few_results_available(self, _perform_api_call):