Skip to content

Commit a2a4ade

Browse files
janvanrijnmfeurer
authored andcommitted
[WIP] Add support for Studies (openml#620)
* added study create * redesigns api call function to put the responsibility which HTTP request to perform with the user * added benchmark suite functionality * added request method to read url function * fixing unit tests * PEP8 fixes * adds deletion * removes left over prints * study functions * PEP8 fix * pep8 fix * all run ids * addresses main points of review * typo fix * knowledge type -> entity type * additional check * PEP8 fixes (I) * fix PEP8 (II) * PEP8 (III) * replaced study delete with status update * finalized PR
1 parent 96db525 commit a2a4ade

File tree

19 files changed

+621
-137
lines changed

19 files changed

+621
-137
lines changed

openml/_api_calls.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,20 @@
99
OpenMLServerNoResult)
1010

1111

12-
def _perform_api_call(call, data=None, file_elements=None):
12+
def _perform_api_call(call, request_method, data=None, file_elements=None):
1313
"""
1414
Perform an API call at the OpenML server.
1515
1616
Parameters
1717
----------
1818
call : str
1919
The API call. For example data/list
20+
request_method : str
21+
The HTTP request method to perform the API call with. Legal values:
22+
- get (reading functions, api key optional)
23+
- post (writing functions, generaly require api key)
24+
- delete (deleting functions, require api key)
25+
See REST api documentation which request method is applicable.
2026
data : dict
2127
Dictionary with post-request payload.
2228
file_elements : dict
@@ -38,8 +44,11 @@ def _perform_api_call(call, data=None, file_elements=None):
3844
url = url.replace('=', '%3d')
3945

4046
if file_elements is not None:
47+
if request_method != 'post':
48+
raise ValueError('request method must be post when file elements '
49+
'are present')
4150
return _read_url_files(url, data=data, file_elements=file_elements)
42-
return _read_url(url, data)
51+
return _read_url(url, request_method, data)
4352

4453

4554
def _file_id_to_url(file_id, filename=None):
@@ -78,24 +87,12 @@ def _read_url_files(url, data=None, file_elements=None):
7887
return response.text
7988

8089

81-
def _read_url(url, data=None):
82-
90+
def _read_url(url, request_method, data=None):
8391
data = {} if data is None else data
8492
if config.apikey is not None:
8593
data['api_key'] = config.apikey
8694

87-
if len(data) == 0 or (len(data) == 1 and 'api_key' in data):
88-
response = send_request(
89-
request_method='get', url=url, data=data,
90-
)
91-
92-
else:
93-
# Using requests.post sets header 'Accept-encoding' automatically to
94-
# 'gzip,deflate'
95-
response = send_request(
96-
request_method='post', url=url, data=data,
97-
)
98-
95+
response = send_request(request_method=request_method, url=url, data=data)
9996
if response.status_code != 200:
10097
raise _parse_server_exception(response, url=url)
10198
if 'Content-Encoding' not in response.headers or \
@@ -118,6 +115,8 @@ def send_request(
118115
try:
119116
if request_method == 'get':
120117
response = session.get(url, params=data)
118+
elif request_method == 'delete':
119+
response = session.delete(url, params=data)
121120
elif request_method == 'post':
122121
response = session.post(url, data=data, files=files)
123122
else:

openml/datasets/dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def push_tag(self, tag):
199199
Tag to attach to the dataset.
200200
"""
201201
data = {'data_id': self.dataset_id, 'tag': tag}
202-
openml._api_calls._perform_api_call("/data/tag", data=data)
202+
openml._api_calls._perform_api_call("/data/tag", 'post', data=data)
203203

204204
def remove_tag(self, tag):
205205
"""Removes a tag from this dataset on the server.
@@ -210,7 +210,7 @@ def remove_tag(self, tag):
210210
Tag to attach to the dataset.
211211
"""
212212
data = {'data_id': self.dataset_id, 'tag': tag}
213-
openml._api_calls._perform_api_call("/data/untag", data=data)
213+
openml._api_calls._perform_api_call("/data/untag", 'post', data=data)
214214

215215
def __eq__(self, other):
216216

@@ -531,7 +531,7 @@ def publish(self):
531531
raise ValueError("No path/url to the dataset file was given")
532532

533533
return_value = openml._api_calls._perform_api_call(
534-
"data/",
534+
"data/", 'post',
535535
file_elements=file_elements,
536536
)
537537
self.dataset_id = int(xmltodict.parse(return_value)['oml:upload_data_set']['oml:id'])

openml/datasets/functions.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def _list_datasets(**kwargs):
232232

233233
def __list_datasets(api_call):
234234

235-
xml_string = openml._api_calls._perform_api_call(api_call)
235+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
236236
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
237237

238238
# Minimalistic check if the XML is useful
@@ -621,6 +621,7 @@ def status_update(data_id, status):
621621
'Legal values: %s' % legal_status)
622622
data = {'data_id': data_id, 'status': status}
623623
result_xml = openml._api_calls._perform_api_call("data/status/update",
624+
'post',
624625
data=data)
625626
result = xmltodict.parse(result_xml)
626627
server_data_id = result['oml:data_status_update']['oml:id']
@@ -659,7 +660,8 @@ def _get_dataset_description(did_cache_dir, dataset_id):
659660
try:
660661
return _get_cached_dataset_description(dataset_id)
661662
except OpenMLCacheException:
662-
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id)
663+
url_suffix = "data/%d" % dataset_id
664+
dataset_xml = openml._api_calls._perform_api_call(url_suffix, 'get')
663665
with io.open(description_file, "w", encoding='utf8') as fh:
664666
fh.write(dataset_xml)
665667

@@ -704,7 +706,7 @@ def _get_dataset_arff(did_cache_dir, description):
704706
pass
705707

706708
url = description['oml:url']
707-
arff_string = openml._api_calls._read_url(url)
709+
arff_string = openml._api_calls._read_url(url, request_method='get')
708710
md5 = hashlib.md5()
709711
md5.update(arff_string.encode('utf-8'))
710712
md5_checksum = md5.hexdigest()
@@ -751,7 +753,8 @@ def _get_dataset_features(did_cache_dir, dataset_id):
751753
with io.open(features_file, encoding='utf8') as fh:
752754
features_xml = fh.read()
753755
except (OSError, IOError):
754-
features_xml = openml._api_calls._perform_api_call("data/features/%d" % dataset_id)
756+
url_suffix = "data/features/%d" % dataset_id
757+
features_xml = openml._api_calls._perform_api_call(url_suffix, 'get')
755758

756759
with io.open(features_file, "w", encoding='utf8') as fh:
757760
fh.write(features_xml)
@@ -787,7 +790,8 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
787790
with io.open(qualities_file, encoding='utf8') as fh:
788791
qualities_xml = fh.read()
789792
except (OSError, IOError):
790-
qualities_xml = openml._api_calls._perform_api_call("data/qualities/%d" % dataset_id)
793+
url_suffix = "data/qualities/%d" % dataset_id
794+
qualities_xml = openml._api_calls._perform_api_call(url_suffix, 'get')
791795

792796
with io.open(qualities_file, "w", encoding='utf8') as fh:
793797
fh.write(qualities_xml)
@@ -859,11 +863,13 @@ def _get_online_dataset_arff(dataset_id):
859863
str
860864
A string representation of an ARFF file.
861865
"""
862-
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id)
866+
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id,
867+
'get')
863868
# build a dict from the xml.
864869
# use the url from the dataset description and return the ARFF string
865870
return openml._api_calls._read_url(
866-
xmltodict.parse(dataset_xml)['oml:data_set_description']['oml:url']
871+
xmltodict.parse(dataset_xml)['oml:data_set_description']['oml:url'],
872+
request_method='get'
867873
)
868874

869875

@@ -881,7 +887,8 @@ def _get_online_dataset_format(dataset_id):
881887
str
882888
Dataset format.
883889
"""
884-
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id)
890+
dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id,
891+
'get')
885892
# build a dict from the xml and get the format from the dataset description
886893
return xmltodict\
887894
.parse(dataset_xml)['oml:data_set_description']['oml:format']\

openml/evaluations/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _list_evaluations(function, id=None, task=None,
100100

101101
def __list_evaluations(api_call):
102102
"""Helper function to parse API calls which are lists of runs"""
103-
xml_string = openml._api_calls._perform_api_call(api_call)
103+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
104104
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
105105
# Minimalistic check if the XML is useful
106106
if 'oml:evaluations' not in evals_dict:

openml/flows/flow.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ def publish(self):
331331
file_elements = {'description': xml_description}
332332
return_value = openml._api_calls._perform_api_call(
333333
"flow/",
334+
'post',
334335
file_elements=file_elements,
335336
)
336337
flow_id = int(xmltodict.parse(return_value)['oml:upload_flow']['oml:id'])
@@ -414,7 +415,7 @@ def push_tag(self, tag):
414415
Tag to attach to the flow.
415416
"""
416417
data = {'flow_id': self.flow_id, 'tag': tag}
417-
openml._api_calls._perform_api_call("/flow/tag", data=data)
418+
openml._api_calls._perform_api_call("/flow/tag", 'post', data=data)
418419

419420
def remove_tag(self, tag):
420421
"""Removes a tag from this flow on the server.
@@ -425,7 +426,7 @@ def remove_tag(self, tag):
425426
Tag to attach to the flow.
426427
"""
427428
data = {'flow_id': self.flow_id, 'tag': tag}
428-
openml._api_calls._perform_api_call("/flow/untag", data=data)
429+
openml._api_calls._perform_api_call("/flow/untag", 'post', data=data)
429430

430431

431432
def _copy_server_fields(source_flow, target_flow):

openml/flows/functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ def get_flow(flow_id, reinstantiate=False):
2626
the flow
2727
"""
2828
flow_id = int(flow_id)
29-
flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id)
29+
flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id,
30+
'get')
3031

3132
flow_dict = xmltodict.parse(flow_xml)
3233
flow = OpenMLFlow._from_dict(flow_dict)
@@ -125,6 +126,7 @@ def flow_exists(name, external_version):
125126

126127
xml_response = openml._api_calls._perform_api_call(
127128
"flow/exists",
129+
'post',
128130
data={'name': name, 'external_version': external_version},
129131
)
130132

@@ -138,7 +140,7 @@ def flow_exists(name, external_version):
138140

139141
def __list_flows(api_call):
140142

141-
xml_string = openml._api_calls._perform_api_call(api_call)
143+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
142144
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
143145

144146
# Minimalistic check if the XML is useful

openml/runs/functions.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ def get_run_trace(run_id):
219219
-------
220220
openml.runs.OpenMLTrace
221221
"""
222-
trace_xml = openml._api_calls._perform_api_call('run/trace/%d' % run_id)
222+
trace_xml = openml._api_calls._perform_api_call('run/trace/%d' % run_id,
223+
'get')
223224
run_trace = OpenMLRunTrace.trace_from_xml(trace_xml)
224225
return run_trace
225226

@@ -838,8 +839,9 @@ def get_run(run_id):
838839
try:
839840
return _get_cached_run(run_id)
840841

841-
except OpenMLCacheException:
842-
run_xml = openml._api_calls._perform_api_call("run/%d" % run_id)
842+
except (OpenMLCacheException):
843+
run_xml = openml._api_calls._perform_api_call("run/%d" % run_id,
844+
'get')
843845
with io.open(run_file, "w", encoding='utf8') as fh:
844846
fh.write(run_xml)
845847

@@ -1118,7 +1120,7 @@ def _list_runs(id=None, task=None, setup=None,
11181120

11191121
def __list_runs(api_call):
11201122
"""Helper function to parse API calls which are lists of runs"""
1121-
xml_string = openml._api_calls._perform_api_call(api_call)
1123+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
11221124
runs_dict = xmltodict.parse(xml_string, force_list=('oml:run',))
11231125
# Minimalistic check if the XML is useful
11241126
if 'oml:runs' not in runs_dict:

openml/runs/run.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,9 @@ def get_metric_fn(self, sklearn_fn, kwargs={}):
265265
predictions_file_url = openml._api_calls._file_id_to_url(
266266
self.output_files['predictions'], 'predictions.arff',
267267
)
268-
predictions_arff = \
269-
arff.loads(openml._api_calls._read_url(predictions_file_url))
268+
response = openml._api_calls._read_url(predictions_file_url,
269+
request_method='get')
270+
predictions_arff = arff.loads(response)
270271
# TODO: make this a stream reader
271272
else:
272273
raise ValueError('Run should have been locally executed or '
@@ -398,12 +399,11 @@ def publish(self):
398399
trace_arff = arff.dumps(self.trace.trace_to_arff())
399400
file_elements['trace'] = ("trace.arff", trace_arff)
400401

401-
return_value = \
402-
openml._api_calls._perform_api_call("/run/",
403-
file_elements=file_elements)
404-
run_id = \
405-
int(xmltodict.parse(return_value)['oml:upload_run']['oml:run_id'])
406-
self.run_id = run_id
402+
return_value = openml._api_calls._perform_api_call(
403+
"/run/", 'post', file_elements=file_elements
404+
)
405+
result = xmltodict.parse(return_value)
406+
self.run_id = int(result['oml:upload_run']['oml:run_id'])
407407
return self
408408

409409
def _create_description_xml(self):
@@ -440,7 +440,7 @@ def push_tag(self, tag):
440440
Tag to attach to the run.
441441
"""
442442
data = {'run_id': self.run_id, 'tag': tag}
443-
openml._api_calls._perform_api_call("/run/tag", data=data)
443+
openml._api_calls._perform_api_call("/run/tag", 'post', data=data)
444444

445445
def remove_tag(self, tag):
446446
"""Removes a tag from this run on the server.
@@ -451,7 +451,7 @@ def remove_tag(self, tag):
451451
Tag to attach to the run.
452452
"""
453453
data = {'run_id': self.run_id, 'tag': tag}
454-
openml._api_calls._perform_api_call("/run/untag", data=data)
454+
openml._api_calls._perform_api_call("/run/untag", 'post', data=data)
455455

456456

457457
###############################################################################

openml/setups/functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def setup_exists(flow):
4646
pretty=True)
4747
file_elements = {'description': ('description.arff', description)}
4848
result = openml._api_calls._perform_api_call('/setup/exists/',
49+
'post',
4950
file_elements=file_elements)
5051
result_dict = xmltodict.parse(result)
5152
setup_id = int(result_dict['oml:setup_exists']['oml:id'])
@@ -95,7 +96,8 @@ def get_setup(setup_id):
9596
return _get_cached_setup(setup_id)
9697

9798
except (openml.exceptions.OpenMLCacheException):
98-
setup_xml = openml._api_calls._perform_api_call('/setup/%d' % setup_id)
99+
url_suffix = '/setup/%d' % setup_id
100+
setup_xml = openml._api_calls._perform_api_call(url_suffix, 'get')
99101
with io.open(setup_file, "w", encoding='utf8') as fh:
100102
fh.write(setup_xml)
101103

@@ -155,7 +157,7 @@ def _list_setups(setup=None, **kwargs):
155157

156158
def __list_setups(api_call):
157159
"""Helper function to parse API calls which are lists of setups"""
158-
xml_string = openml._api_calls._perform_api_call(api_call)
160+
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
159161
setups_dict = xmltodict.parse(xml_string, force_list=('oml:setup',))
160162
# Minimalistic check if the XML is useful
161163
if 'oml:setups' not in setups_dict:

openml/study/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11
from .study import OpenMLStudy
2-
from .functions import get_study
2+
from .functions import get_study, create_study, create_benchmark_suite, \
3+
status_update, attach_to_study, detach_from_study, delete_study
4+
5+
6+
__all__ = [
7+
'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
8+
'delete_study', 'detach_from_study', 'get_study', 'status_update'
9+
]

0 commit comments

Comments
 (0)