4343 OpenMLNotAuthorizedError ,
4444 OpenMLPrivateDatasetError ,
4545 OpenMLServerException ,
46+ OpenMLServerNoResult ,
4647)
4748from openml .tasks import TaskType , create_task
4849from openml .testing import TestBase , create_request_response
@@ -274,9 +275,7 @@ def test_get_dataset_cannot_access_private_data(self):
274275 @pytest .mark .skip ("Need to find dataset name of private dataset" )
275276 def test_dataset_by_name_cannot_access_private_data (self ):
276277 openml .config .server = self .production_server
277- self .assertRaises (
278- OpenMLPrivateDatasetError , openml .datasets .get_dataset , "NAME_GOES_HERE"
279- )
278+ self .assertRaises (OpenMLPrivateDatasetError , openml .datasets .get_dataset , "NAME_GOES_HERE" )
280279
281280 def test_get_dataset_lazy_all_functions (self ):
282281 """Test that all expected functionality is available without downloading the dataset."""
@@ -285,9 +284,7 @@ def test_get_dataset_lazy_all_functions(self):
285284
286285 def ensure_absence_of_real_data ():
287286 assert not os .path .exists (
288- os .path .join (
289- openml .config .get_cache_directory (), "datasets" , "1" , "dataset.arff"
290- )
287+ os .path .join (openml .config .get_cache_directory (), "datasets" , "1" , "dataset.arff" )
291288 )
292289
293290 tag = "test_lazy_tag_%d" % random .randint (1 , 1000000 )
@@ -509,12 +506,8 @@ def test_deletion_of_cache_dir(self):
509506 @mock .patch ("openml.datasets.functions._get_dataset_description" )
510507 def test_deletion_of_cache_dir_faulty_download (self , patch ):
511508 patch .side_effect = Exception ("Boom!" )
512- self .assertRaisesRegex (
513- Exception , "Boom!" , openml .datasets .get_dataset , dataset_id = 1
514- )
515- datasets_cache_dir = os .path .join (
516- self .workdir , "org" , "openml" , "test" , "datasets"
517- )
509+ self .assertRaisesRegex (Exception , "Boom!" , openml .datasets .get_dataset , dataset_id = 1 )
510+ datasets_cache_dir = os .path .join (self .workdir , "org" , "openml" , "test" , "datasets" )
518511 assert len (os .listdir (datasets_cache_dir )) == 0
519512
520513 def test_publish_dataset (self ):
@@ -555,9 +548,7 @@ def test__retrieve_class_labels(self):
555548 # Test workaround for string-typed class labels
556549 custom_ds = openml .datasets .get_dataset (2 )
557550 custom_ds .features [31 ].data_type = "string"
558- labels = custom_ds .retrieve_class_labels (
559- target_name = custom_ds .features [31 ].name
560- )
551+ labels = custom_ds .retrieve_class_labels (target_name = custom_ds .features [31 ].name )
561552 assert labels == ["COIL" , "SHEET" ]
562553
563554 def test_upload_dataset_with_url (self ):
@@ -600,9 +591,7 @@ def test_data_status(self):
600591 )
601592 dataset .publish ()
602593 TestBase ._mark_entity_for_removal ("data" , dataset .id )
603- TestBase .logger .info (
604- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
605- )
594+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
606595 did = dataset .id
607596
608597 # admin key for test server (only adminds can activate datasets.
@@ -678,8 +667,7 @@ def test_attributes_arff_from_df_unknown_dtype(self):
678667 for arr , dt in zip (data , dtype ):
679668 df = pd .DataFrame (arr )
680669 err_msg = (
681- f"The dtype '{ dt } ' of the column '0' is not currently "
682- "supported by liac-arff"
670+ f"The dtype '{ dt } ' of the column '0' is not currently " "supported by liac-arff"
683671 )
684672 with pytest .raises (ValueError , match = err_msg ):
685673 attributes_arff_from_df (df )
@@ -710,16 +698,12 @@ def test_create_dataset_numpy(self):
710698
711699 dataset .publish ()
712700 TestBase ._mark_entity_for_removal ("data" , dataset .id )
713- TestBase .logger .info (
714- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
715- )
701+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
716702
717703 assert (
718704 _get_online_dataset_arff (dataset .id ) == dataset ._dataset
719705 ), "Uploaded arff does not match original one"
720- assert (
721- _get_online_dataset_format (dataset .id ) == "arff"
722- ), "Wrong format for dataset"
706+ assert _get_online_dataset_format (dataset .id ) == "arff" , "Wrong format for dataset"
723707
724708 def test_create_dataset_list (self ):
725709 data = [
@@ -769,15 +753,11 @@ def test_create_dataset_list(self):
769753
770754 dataset .publish ()
771755 TestBase ._mark_entity_for_removal ("data" , dataset .id )
772- TestBase .logger .info (
773- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
774- )
756+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
775757 assert (
776758 _get_online_dataset_arff (dataset .id ) == dataset ._dataset
777759 ), "Uploaded ARFF does not match original one"
778- assert (
779- _get_online_dataset_format (dataset .id ) == "arff"
780- ), "Wrong format for dataset"
760+ assert _get_online_dataset_format (dataset .id ) == "arff" , "Wrong format for dataset"
781761
782762 def test_create_dataset_sparse (self ):
783763 # test the scipy.sparse.coo_matrix
@@ -974,9 +954,7 @@ def test_create_dataset_pandas(self):
974954 )
975955 dataset .publish ()
976956 TestBase ._mark_entity_for_removal ("data" , dataset .id )
977- TestBase .logger .info (
978- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
979- )
957+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
980958 assert (
981959 _get_online_dataset_arff (dataset .id ) == dataset ._dataset
982960 ), "Uploaded ARFF does not match original one"
@@ -991,9 +969,7 @@ def test_create_dataset_pandas(self):
991969 column_names = ["input1" , "input2" , "y" ]
992970 df = pd .DataFrame .sparse .from_spmatrix (sparse_data , columns = column_names )
993971 # meta-information
994- description = (
995- "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
996- )
972+ description = "Synthetic dataset created from a Pandas DataFrame with Sparse columns"
997973 dataset = openml .datasets .functions .create_dataset (
998974 name = name ,
999975 description = description ,
@@ -1014,15 +990,11 @@ def test_create_dataset_pandas(self):
1014990 )
1015991 dataset .publish ()
1016992 TestBase ._mark_entity_for_removal ("data" , dataset .id )
1017- TestBase .logger .info (
1018- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
1019- )
993+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
1020994 assert (
1021995 _get_online_dataset_arff (dataset .id ) == dataset ._dataset
1022996 ), "Uploaded ARFF does not match original one"
1023- assert (
1024- _get_online_dataset_format (dataset .id ) == "sparse_arff"
1025- ), "Wrong format for dataset"
997+ assert _get_online_dataset_format (dataset .id ) == "sparse_arff" , "Wrong format for dataset"
1026998
1027999 # Check that we can overwrite the attributes
10281000 data = [["a" ], ["b" ], ["c" ], ["d" ], ["e" ]]
@@ -1050,13 +1022,9 @@ def test_create_dataset_pandas(self):
10501022 )
10511023 dataset .publish ()
10521024 TestBase ._mark_entity_for_removal ("data" , dataset .id )
1053- TestBase .logger .info (
1054- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
1055- )
1025+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
10561026 downloaded_data = _get_online_dataset_arff (dataset .id )
1057- assert (
1058- downloaded_data == dataset ._dataset
1059- ), "Uploaded ARFF does not match original one"
1027+ assert downloaded_data == dataset ._dataset , "Uploaded ARFF does not match original one"
10601028 assert "@ATTRIBUTE rnd_str {a, b, c, d, e, f, g}" in downloaded_data
10611029
10621030 def test_ignore_attributes_dataset (self ):
@@ -1217,9 +1185,7 @@ def test_publish_fetch_ignore_attribute(self):
12171185 # publish dataset
12181186 dataset .publish ()
12191187 TestBase ._mark_entity_for_removal ("data" , dataset .id )
1220- TestBase .logger .info (
1221- "collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id )
1222- )
1188+ TestBase .logger .info ("collected from {}: {}" .format (__file__ .split ("/" )[- 1 ], dataset .id ))
12231189 # test if publish was successful
12241190 assert isinstance (dataset .id , int )
12251191
@@ -1403,9 +1369,7 @@ def test_get_dataset_cache_format_feather(self):
14031369 cache_dir = openml .config .get_cache_directory ()
14041370 cache_dir_for_id = os .path .join (cache_dir , "datasets" , "128" )
14051371 feather_file = os .path .join (cache_dir_for_id , "dataset.feather" )
1406- pickle_file = os .path .join (
1407- cache_dir_for_id , "dataset.feather.attributes.pkl.py3"
1408- )
1372+ pickle_file = os .path .join (cache_dir_for_id , "dataset.feather.attributes.pkl.py3" )
14091373 data = pd .read_feather (feather_file )
14101374 assert os .path .isfile (feather_file ), "Feather file is missing"
14111375 assert os .path .isfile (pickle_file ), "Attributes pickle file is missing"
@@ -1450,19 +1414,15 @@ def test_data_edit_critical_field(self):
14501414 # for this, we need to first clone a dataset to do changes
14511415 did = fork_dataset (1 )
14521416 self ._wait_for_dataset_being_processed (did )
1453- result = edit_dataset (
1454- did , default_target_attribute = "shape" , ignore_attribute = "oil"
1455- )
1417+ result = edit_dataset (did , default_target_attribute = "shape" , ignore_attribute = "oil" )
14561418 assert did == result
14571419
14581420 n_tries = 10
14591421 # we need to wait for the edit to be reflected on the server
14601422 for i in range (n_tries ):
14611423 edited_dataset = openml .datasets .get_dataset (did )
14621424 try :
1463- assert (
1464- edited_dataset .default_target_attribute == "shape"
1465- ), edited_dataset
1425+ assert edited_dataset .default_target_attribute == "shape" , edited_dataset
14661426 assert edited_dataset .ignore_attribute == ["oil" ], edited_dataset
14671427 break
14681428 except AssertionError as e :
@@ -1471,9 +1431,7 @@ def test_data_edit_critical_field(self):
14711431 time .sleep (10 )
14721432 # Delete the cache dir to get the newer version of the dataset
14731433 shutil .rmtree (
1474- os .path .join (
1475- self .workdir , "org" , "openml" , "test" , "datasets" , str (did )
1476- ),
1434+ os .path .join (self .workdir , "org" , "openml" , "test" , "datasets" , str (did )),
14771435 )
14781436
14791437 def test_data_edit_requires_field (self ):
@@ -1564,9 +1522,7 @@ def test_list_datasets_with_high_size_parameter(self):
15641522 openml .config .server = self .production_server
15651523
15661524 datasets_a = openml .datasets .list_datasets (output_format = "dataframe" )
1567- datasets_b = openml .datasets .list_datasets (
1568- output_format = "dataframe" , size = np .inf
1569- )
1525+ datasets_b = openml .datasets .list_datasets (output_format = "dataframe" , size = np .inf )
15701526
15711527 # Reverting to test server
15721528 openml .config .server = self .test_server
@@ -1646,9 +1602,7 @@ def test_invalid_attribute_validations(
16461602 (None , None , ["outlook" , "windy" ]),
16471603 ],
16481604)
1649- def test_valid_attribute_validations (
1650- default_target_attribute , row_id_attribute , ignore_attribute
1651- ):
1605+ def test_valid_attribute_validations (default_target_attribute , row_id_attribute , ignore_attribute ):
16521606 data = [
16531607 ["a" , "sunny" , 85.0 , 85.0 , "FALSE" , "no" ],
16541608 ["b" , "sunny" , 80.0 , 90.0 , "TRUE" , "no" ],
@@ -1749,10 +1703,7 @@ def test_delete_dataset(self):
17491703def test_delete_dataset_not_owned (mock_delete , test_files_directory , test_api_key ):
17501704 openml .config .start_using_configuration_for_example ()
17511705 content_file = (
1752- test_files_directory
1753- / "mock_responses"
1754- / "datasets"
1755- / "data_delete_not_owned.xml"
1706+ test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
17561707 )
17571708 mock_delete .return_value = create_request_response (
17581709 status_code = 412 ,
@@ -1774,10 +1725,7 @@ def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_ke
17741725def test_delete_dataset_with_run (mock_delete , test_files_directory , test_api_key ):
17751726 openml .config .start_using_configuration_for_example ()
17761727 content_file = (
1777- test_files_directory
1778- / "mock_responses"
1779- / "datasets"
1780- / "data_delete_has_tasks.xml"
1728+ test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
17811729 )
17821730 mock_delete .return_value = create_request_response (
17831731 status_code = 412 ,
@@ -1799,10 +1747,7 @@ def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key
17991747def test_delete_dataset_success (mock_delete , test_files_directory , test_api_key ):
18001748 openml .config .start_using_configuration_for_example ()
18011749 content_file = (
1802- test_files_directory
1803- / "mock_responses"
1804- / "datasets"
1805- / "data_delete_successful.xml"
1750+ test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
18061751 )
18071752 mock_delete .return_value = create_request_response (
18081753 status_code = 200 ,
@@ -1821,10 +1766,7 @@ def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key)
18211766def test_delete_unknown_dataset (mock_delete , test_files_directory , test_api_key ):
18221767 openml .config .start_using_configuration_for_example ()
18231768 content_file = (
1824- test_files_directory
1825- / "mock_responses"
1826- / "datasets"
1827- / "data_delete_not_exist.xml"
1769+ test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
18281770 )
18291771 mock_delete .return_value = create_request_response (
18301772 status_code = 412 ,
@@ -1861,9 +1803,7 @@ def test_list_datasets(all_datasets: pd.DataFrame):
18611803
18621804
18631805def test_list_datasets_by_tag (all_datasets : pd .DataFrame ):
1864- tag_datasets = openml .datasets .list_datasets (
1865- tag = "study_14" , output_format = "dataframe"
1866- )
1806+ tag_datasets = openml .datasets .list_datasets (tag = "study_14" , output_format = "dataframe" )
18671807 assert 0 < len (tag_datasets ) < len (all_datasets )
18681808 _assert_datasets_have_id_and_valid_status (tag_datasets )
18691809
@@ -2001,15 +1941,16 @@ def test_get_dataset_lazy_behavior(
20011941 with_features = with_features ,
20021942 with_data = with_data ,
20031943 )
2004- assert (
2005- dataset .features
2006- ), "Features should be downloaded on-demand if not during get_dataset"
2007- assert (
2008- dataset .qualities
2009- ), "Qualities should be downloaded on-demand if not during get_dataset"
2010- assert (
2011- dataset .get_data ()
2012- ), "Data should be downloaded on-demand if not during get_dataset"
1944+ assert dataset .features , "Features should be downloaded on-demand if not during get_dataset"
1945+ assert dataset .qualities , "Qualities should be downloaded on-demand if not during get_dataset"
1946+ assert dataset .get_data (), "Data should be downloaded on-demand if not during get_dataset"
20131947 _assert_datasets_retrieved_successfully (
20141948 [1 ], with_qualities = True , with_features = True , with_data = True
20151949 )
1950+
1951+
1952+ def test_get_dataset_with_invalid_id () -> None :
1953+ INVALID_ID = 123819023109238 # Well, at some point this will probably be valid...
1954+ with pytest .raises (OpenMLServerNoResult , match = "Unknown dataset" ) as e :
1955+ openml .datasets .get_dataset (INVALID_ID )
1956+ assert e .value .code == 111
0 commit comments