codervinitjangir
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 37 additions & 13 deletions b/‎.github/workflows/test.yml‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 3 additions & 1 deletion b/‎tests/conftest.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tests/test_datasets/test_dataset_functions.py‎
Lines changed: 41 additions & 1 deletion b/‎tests/test_datasets/test_dataset_functions.py‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎tests/test_flows/test_flow.py‎
Lines changed: 21 additions & 1 deletion b/‎tests/test_flows/test_flow.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎tests/test_flows/test_flow_functions.py‎
Lines changed: 9 additions & 0 deletions b/‎tests/test_flows/test_flow_functions.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎tests/test_openml/test_api_calls.py‎
Lines changed: 5 additions & 0 deletions b/‎tests/test_openml/test_api_calls.py‎
Lines changed: 5 additions & 0 deletions
@@ -101,22 +101,40 @@ jobs:
         echo "BEFORE=$git_status" >> $GITHUB_ENV
         echo "Repository status before tests: $git_status"
 
+    - name: Clone Services
+      if: matrix.os == 'ubuntu-latest'
+      run: |
+        git clone --depth 1 https://github.com/openml/services.git
+
+    - name: Start Docker Services
+      if: matrix.os == 'ubuntu-latest'
+      working-directory: ./services
+      run: |
+        docker compose --profile rest-api --profile minio up -d
+
+        echo "Waiting for PHP API to boot..."
+        timeout 60s bash -c 'until [ "$(docker inspect -f {{.State.Health.Status}} openml-php-rest-api)" == "healthy" ]; do sleep 5; done'
+
+        echo "Final Verification: Gateway Connectivity..."
+        curl -sSfL http://localhost:8000/api/v1/xml/data/1 | head -n 15
+
     - name: Show installed dependencies
       run: python -m pip list
 
     - name: Run tests on Ubuntu Test
       if: matrix.os == 'ubuntu-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
+        OPENML_USE_LOCAL_SERVICES: "true"
       run: |
         if [ "${{ matrix.code-cov }}" = "true" ]; then
           codecov="--cov=openml --long --cov-report=xml"
         fi
 
         if [ "${{ matrix.sklearn-only }}" = "true" ]; then
-          marks="sklearn and not production_server and not test_server"
+          marks="sklearn and not production_server"
         else
-          marks="not production_server and not test_server"
+          marks="not production_server"
         fi
 
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -125,15 +143,16 @@ jobs:
       if: matrix.os == 'ubuntu-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
+        OPENML_USE_LOCAL_SERVICES: "true"
       run: |
         if [ "${{ matrix.code-cov }}" = "true" ]; then
           codecov="--cov=openml --long --cov-report=xml"
         fi
 
         if [ "${{ matrix.sklearn-only }}" = "true" ]; then
-          marks="sklearn and production_server and not test_server"
+          marks="sklearn and production_server"
         else
-          marks="production_server and not test_server"
+          marks="production_server"
         fi
 
         pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
@@ -145,6 +164,20 @@ jobs:
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
         pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
 
+    - name: Upload coverage
+      if: matrix.code-cov && always()
+      uses: codecov/codecov-action@v4
+      with:
+        files: coverage.xml
+        token: ${{ secrets.CODECOV_TOKEN }}
+        fail_ci_if_error: true
+        verbose: true
+
+    - name: Cleanup Docker setup
+      if: matrix.os == 'ubuntu-latest' && always()
+      run: |
+        sudo rm -rf services
+
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
       run: |
@@ -157,15 +190,6 @@ jobs:
             exit 1
         fi
 
-    - name: Upload coverage
-      if: matrix.code-cov && always()
-      uses: codecov/codecov-action@v4
-      with:
-        files: coverage.xml
-        token: ${{ secrets.CODECOV_TOKEN }}
-        fail_ci_if_error: true
-        verbose: true
-
   dummy_windows_py_sk024:
     name: (windows-latest, Py, sk0.24.*, sk-only:false)
     runs-on: ubuntu-latest
 
@@ -272,6 +272,8 @@ def as_robot() -> Iterator[None]:
 
 @pytest.fixture(autouse=True)
 def with_server(request):
+    if os.getenv("OPENML_USE_LOCAL_SERVICES") == "true":
+        openml.config.TEST_SERVER_URL = "http://localhost:8000"
     if "production_server" in request.keywords:
         openml.config.server = "https://www.openml.org/api/v1/xml"
         openml.config.apikey = None
@@ -306,4 +308,4 @@ def workdir(tmp_path):
     original_cwd = Path.cwd()
     os.chdir(tmp_path)
     yield tmp_path
-    os.chdir(original_cwd)
+    os.chdir(original_cwd)
@@ -530,6 +530,10 @@ def test_deletion_of_cache_dir_faulty_download(self, patch):
         datasets_cache_dir = os.path.join(openml.config.get_cache_directory(), "datasets")
         assert len(os.listdir(datasets_cache_dir)) == 0
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_publish_dataset(self):
         arff_file_path = self.static_cache_dir / "org" / "openml" / "test" / "datasets" / "2" / "dataset.arff"
@@ -566,6 +570,10 @@ def test__retrieve_class_labels(self):
         labels = custom_ds.retrieve_class_labels(target_name=custom_ds.features[31].name)
         assert labels == ["COIL", "SHEET"]
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_upload_dataset_with_url(self):
         dataset = OpenMLDataset(
@@ -689,6 +697,10 @@ def test_attributes_arff_from_df_unknown_dtype(self):
             with pytest.raises(ValueError, match=err_msg):
                 attributes_arff_from_df(df)
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_numpy(self):
         data = np.array([[1, 2, 3], [1.2, 2.5, 3.8], [2, 5, 8], [0, 1, 0]]).T
@@ -723,6 +735,10 @@ def test_create_dataset_numpy(self):
         ), "Uploaded arff does not match original one"
         assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_list(self):
         data = [
@@ -778,6 +794,10 @@ def test_create_dataset_list(self):
         ), "Uploaded ARFF does not match original one"
         assert _get_online_dataset_format(dataset.id) == "arff", "Wrong format for dataset"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_sparse(self):
         # test the scipy.sparse.coo_matrix
@@ -926,6 +946,10 @@ def test_get_online_dataset_format(self):
             dataset_id
         ), "The format of the ARFF files is different"
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_pandas(self):
         data = [
@@ -1151,6 +1175,10 @@ def test_ignore_attributes_dataset(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_publish_fetch_ignore_attribute(self):
         """Test to upload and retrieve dataset and check ignore_attributes"""
@@ -1270,6 +1298,10 @@ def test_create_dataset_row_id_attribute_error(self):
                 paper_url=paper_url,
             )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_create_dataset_row_id_attribute_inference(self):
         # meta-information
@@ -1438,6 +1470,10 @@ def test_data_edit_non_critical_field(self):
         edited_dataset = openml.datasets.get_dataset(did)
         assert edited_dataset.description == desc
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_data_edit_critical_field(self):
         # Case 2
@@ -1490,6 +1526,10 @@ def test_data_edit_requires_valid_dataset(self):
             description="xor operation dataset",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.test_server()
     def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self):
         # Need to own a dataset to be able to edit meta-data
@@ -2008,4 +2048,4 @@ def test_get_dataset_parquet(requests_mock, test_files_directory):
     assert dataset._parquet_url is not None
     assert dataset.parquet_file is not None
     assert os.path.isfile(dataset.parquet_file)
-    assert dataset.data_file is None  # is alias for arff path
+    assert dataset.data_file is None  # is alias for arff path
@@ -5,6 +5,7 @@
 import copy
 import hashlib
 import re
+import os
 import time
 from packaging.version import Version
 from unittest import mock
@@ -33,7 +34,6 @@
 from openml.testing import SimpleImputer, TestBase
 
 
-
 class TestFlow(TestBase):
     _multiprocess_can_split_ = True
 
@@ -180,6 +180,10 @@ def test_to_xml_from_xml(self):
         openml.flows.functions.assert_flows_equal(new_flow, flow)
         assert new_flow is not flow
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_publish_flow(self):
@@ -222,6 +226,10 @@ def test_publish_existing_flow(self, flow_exists_mock):
             f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
         )
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_publish_flow_with_similar_components(self):
@@ -273,6 +281,10 @@ def test_publish_flow_with_similar_components(self):
         TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name)
         TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_semi_legal_flow(self):
@@ -383,6 +395,10 @@ def get_sentinel():
         flow_id = openml.flows.flow_exists(name, version)
         assert not flow_id
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_existing_flow_exists(self):
@@ -424,6 +440,10 @@ def test_existing_flow_exists(self):
             )
             assert downloaded_flow_id == flow.flow_id
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_sklearn_to_upload_to_flow(self):
 
@@ -12,6 +12,7 @@
 from unittest import mock
 from unittest.mock import patch
 
+import os
 import pandas as pd
 import pytest
 import requests
@@ -309,6 +310,10 @@ def test_get_flow1(self):
         flow = openml.flows.get_flow(1)
         assert flow.external_version is None
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_get_flow_reinstantiate_model(self):
@@ -392,6 +397,10 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
         assert flow.flow_id is None
         assert "sklearn==0.19.1" not in flow.dependencies
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @pytest.mark.sklearn()
     @pytest.mark.test_server()
     def test_get_flow_id(self):
 
@@ -7,6 +7,7 @@
 
 import minio
 import pytest
+import os
 
 import openml
 from openml.config import ConfigurationForExamples
@@ -20,6 +21,10 @@ def test_too_long_uri(self):
         with pytest.raises(openml.exceptions.OpenMLServerError, match="URI too long!"):
             openml.datasets.list_datasets(data_id=list(range(10000)))
 
+    @pytest.mark.skipif(
+        os.getenv("OPENML_USE_LOCAL_SERVICES") == "true",
+        reason="Pending resolution of #1657",
+    )
     @unittest.mock.patch("time.sleep")
     @unittest.mock.patch("requests.Session")
     @pytest.mark.test_server()