Skip to content

Commit 948aebe

Browse files
committed
Refactoring __str__ to remove redundancies
1 parent 90f425f commit 948aebe

File tree

7 files changed

+203
-160
lines changed

7 files changed

+203
-160
lines changed

openml/datasets/dataset.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -174,29 +174,32 @@ def __init__(self, name, description, format=None,
174174
self.data_pickle_file = None
175175

176176
def __str__(self):
177-
object_dict = self.__dict__
178-
output_str = ''
179177
header = "OpenML Dataset"
180178
header = '{}\n{}\n'.format(header, '=' * len(header))
181-
name = '{:.<14}: {}\n'.format("Name", object_dict['name'])
182-
version = '{:.<14}: {}\n'.format("Version", object_dict['version'])
183-
format = '{:.<14}: {}\n'.format("Format", object_dict['format'])
184-
date = '{:.<14}: {}\n'.format("Upload Date", object_dict['upload_date'].replace('T', ' '))
185-
licence = '{:.<14}: {}\n'.format("Licence", object_dict['licence'])
186-
d_url = '{:.<14}: {}\n'.format("Download URL", object_dict['url'])
187-
base_url = 'https://www.openml.org/d/'
188-
w_url = '{:.<14}: {}\n'.format("OpenML URL", base_url + str(self.dataset_id))
189-
local_file = '{:.<14}: {}\n'.format("Data file", object_dict['data_file'])
190-
pickle_file = '{:.<14}: {}\n'.format("Pickle file", object_dict['data_pickle_file'])
191-
num_features = '{:.<14}: {}\n'.format("# of features", len(object_dict['features']))
192-
num_instances = ''
193-
if object_dict['qualities']['NumberOfInstances'] is not None:
194-
num_instances = '{:.<14}: {}\n'.format("# of instances",
195-
object_dict['qualities']['NumberOfInstances'])
196-
197-
output_str = '\n' + header + name + version + format + date + licence + d_url + w_url + \
198-
local_file + pickle_file + num_features + num_instances + '\n'
199-
return output_str
179+
180+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
181+
fields = pd.Series({"Name": self.name,
182+
"Version": self.version,
183+
"Format": self.format,
184+
"Upload Date": self.upload_date.replace('T', ' '),
185+
"Licence": self.licence,
186+
"Download URL": self.url,
187+
"OpenML URL": "{}d/{}".format(base_url, self.dataset_id),
188+
"Data file": self.data_file,
189+
"Pickle file": self.data_pickle_file,
190+
"# of features": len(self.features)})
191+
192+
if self.qualities['NumberOfInstances'] is not None:
193+
fields.append(pd.Series({"# of instances": int(self.qualities['NumberOfInstances'])}))
194+
195+
order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
196+
"OpenML URL", "Data File", "Pickle File", "# of features"]
197+
fields = list(fields.reindex(order).dropna().iteritems())
198+
199+
longest_field_name_length = max(len(name) for name, value in fields)
200+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
201+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
202+
return header + body
200203

201204
def _data_arff_to_pickle(self, data_file):
202205
data_pickle_file = data_file.replace('.arff', '.pkl.py3')

openml/evaluations/evaluation.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import openml.config
2+
import pandas as pd
3+
14

25
class OpenMLEvaluation(object):
36
"""
@@ -49,35 +52,30 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
4952
self.array_data = array_data
5053

5154
def __str__(self):
52-
object_dict = self.__dict__
53-
output_str = ''
5455
header = "OpenML Evaluation"
5556
header = '{}\n{}\n'.format(header, '=' * len(header))
56-
base_url = 'https://www.openml.org/'
57-
upload = '{:.<14}: {}\n'.format('Upload Date', object_dict['upload_time'])
58-
run = '{:.<14}: {}\n'.format('Run ID', object_dict['run_id'])
59-
run = run + '{:.<14}: {}\n'.format('OpenML Run URL',
60-
base_url + 'r/' + str(object_dict['run_id']))
61-
62-
task = '{:.<14}: {}\n'.format('Task ID', object_dict['task_id'])
63-
task = task + '{:.<14}: {}\n'.format('OpenML Task URL',
64-
base_url + 't/' + str(object_dict['task_id']))
65-
66-
flow = '{:.<14}: {}\n'.format('Flow ID', object_dict['flow_id'])
67-
flow = flow + '{:.<14}: {}\n'.format('Flow Name', object_dict['flow_name'])
68-
flow = flow + '{:.<14}: {}\n'.format('OpenML Flow URL',
69-
base_url + 'f/' + str(object_dict['flow_id']))
70-
71-
setup = '{:.<14}: {}\n'.format('Setup ID', object_dict['setup_id'])
7257

73-
data = '{:.<14}: {}\n'.format('Data ID', int(object_dict['data_id']))
74-
data = data + '{:.<14}: {}\n'.format('Data Name', object_dict['data_name'])
75-
data = data + '{:.<14}: {}\n'.format('OpenML Data URL',
76-
base_url + 'd/' + str(object_dict['data_id']))
58+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
59+
fields = pd.Series({"Upload Date": self.upload_time,
60+
"Run ID": self.run_id,
61+
"OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
62+
"Task ID": self.task_id,
63+
"OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
64+
"Flow ID": self.flow_id,
65+
"OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
66+
"Setup ID": self.setup_id,
67+
"Data ID": self.data_id,
68+
"Data Name": self.data_name,
69+
"OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
70+
"Metric Used": self.function,
71+
"Result": self.value})
7772

78-
metric = '{:.<14}: {}\n'.format('Metric Used', object_dict['function'])
79-
value = '{:.<14}: {}\n'.format('Result', object_dict['value'])
73+
order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
74+
"Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
75+
"OpenML Data URL", "Metric Used", "Result"]
76+
fields = list(fields.reindex(order).dropna().iteritems())
8077

81-
output_str = '\n' + header + upload + run + task + flow + setup + data + metric + \
82-
value + '\n'
83-
return output_str
78+
longest_field_name_length = max(len(name) for name, value in fields)
79+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
80+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
81+
return header + body

openml/flows/flow.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
from ..extensions import get_extension_by_flow
88
from ..utils import extract_xml_tags, _tag_entity
99

10+
import openml.config
11+
import pandas as pd
12+
1013

1114
class OpenMLFlow(object):
1215
"""OpenML Flow. Stores machine learning models.
@@ -133,27 +136,27 @@ def __init__(self, name, description, model, components, parameters,
133136
self.extension = get_extension_by_flow(self)
134137

135138
def __str__(self):
136-
object_dict = self.__dict__
137-
output_str = ''
138139
header = "OpenML Flow"
139140
header = '{}\n{}\n'.format(header, '=' * len(header))
140-
id_version = '{:.<16}: {} (Version: {})\n'.format('Flow ID', object_dict['flow_id'],
141-
object_dict['version'])
142-
url = '{:.<16}: {}\n'.format('Flow URL',
143-
'https://www.openml.org/f/' + str(object_dict['flow_id']))
144-
name = '{:.<16}: {}\n'.format('Flow Name', object_dict['name'])
145-
description = '{:.<16}: {}\n'.format('Flow Description', object_dict['description'])
146-
147-
binary = ''
148-
if object_dict['binary_url'] is not None:
149-
binary = '{:.<16}: {}\n'.format('Binary URL', object_dict['binary_url'])
150-
151-
upload = '{:.<16}: {}\n'.format('Upload Date', object_dict['upload_date'].replace('T', ' '))
152-
dependencies = '{:.<16}: {}\n'.format('Dependencies', object_dict['dependencies'])
153-
# 3740 for example
154-
output_str = '\n' + header + id_version + url + name + description + binary + \
155-
upload + dependencies + '\n'
156-
return output_str
141+
142+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
143+
fields = pd.Series({"Flow ID": "{} (version {})".format(self.flow_id, self.version),
144+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
145+
"Flow Name": self.name,
146+
"Flow Description": self.description,
147+
"Upload Date": self.upload_date.replace('T', ' '),
148+
"Dependencies": self.dependencies})
149+
if self.binary_url is not None:
150+
fields = fields.append(pd.Series({"Binary URL": self.binary_url}))
151+
152+
order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
153+
"Upload Date", "Dependencies"]
154+
fields = list(fields.reindex(order).dropna().iteritems())
155+
156+
longest_field_name_length = max(len(name) for name, value in fields)
157+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
158+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
159+
return header + body
157160

158161
def _to_xml(self) -> str:
159162
"""Generate xml representation of self for upload to server.

openml/runs/run.py

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import arff
88
import numpy as np
99
import xmltodict
10+
import pandas as pd
1011

1112
import openml
1213
import openml._api_calls
@@ -64,43 +65,38 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
6465
self.predictions_url = predictions_url
6566

6667
def __str__(self):
67-
object_dict = self.__dict__
68-
output_str = ''
69-
header = 'OpenML Run'
68+
header = "OpenML Run"
7069
header = '{}\n{}\n'.format(header, '=' * len(header))
71-
uploader = '{:.<16}: {}\n'.format('Uploader Name', object_dict['uploader_name'])
72-
url = 'https://www.openml.org/u/' + str(object_dict['uploader'])
73-
uploader = uploader + '{:.<16}: {}\n'.format('Uploader Profile', url)
74-
75-
metric = '{:.<16}: {}\n'.format('Metric', object_dict['task_evaluation_measure'])
76-
result = ''
77-
if object_dict['task_evaluation_measure'] in object_dict['evaluations']:
78-
value = object_dict['evaluations'][object_dict['task_evaluation_measure']]
79-
result = '{:.<16}: {}\n'.format('Result', value)
80-
run = '{:.<16}: {}\n'.format('Run ID', object_dict['run_id'])
81-
url = 'https://www.openml.org/r/' + str(object_dict['run_id'])
82-
run = run + '{:.<16}: {}\n'.format('Run URL', url)
83-
84-
task = '{:.<16}: {}\n'.format('Task ID', object_dict['task_id'])
85-
task = task + '{:.<16}: {}\n'.format('Task Type', object_dict['task_type'])
86-
url = 'https://www.openml.org/t/' + str(object_dict['task_id'])
87-
task = task + '{:.<16}: {}\n'.format('Task URL', url)
88-
89-
flow = '{:.<16}: {}\n'.format('Flow ID', object_dict['flow_id'])
90-
flow = flow + '{:.<16}: {}\n'.format('Flow Name', object_dict['flow_name'])
91-
url = 'https://www.openml.org/f/' + str(object_dict['flow_id'])
92-
flow = flow + '{:.<16}: {}\n'.format('Flow URL', url)
93-
94-
setup = '{:.<16}: {}\n'.format('Setup ID', object_dict['setup_id'])
95-
setup = setup + '{:.<16}: {}\n'.format('Setup String', object_dict['setup_string'])
96-
97-
dataset = '{:.<16}: {}\n'.format('Dataset ID', object_dict['dataset_id'])
98-
url = 'https://www.openml.org/d/' + str(object_dict['dataset_id'])
99-
dataset = dataset + '{:.<16}: {}\n'.format('Dataset URL', url)
100-
101-
output_str = '\n' + header + uploader + metric + result + run + task + flow + setup + \
102-
dataset + '\n'
103-
return output_str
70+
71+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
72+
fields = pd.Series({"Uploader Name": self.uploader_name,
73+
"Uploader Profile": "{}u/{}".format(base_url, self.uploader),
74+
"Metric": self.task_evaluation_measure,
75+
"Run ID": self.run_id,
76+
"Run URL": "{}r/{}".format(base_url, self.run_id),
77+
"Task ID": self.task_id,
78+
"Task Type": self.task_type,
79+
"Task URL": "{}t/{}".format(base_url, self.run_id),
80+
"Flow ID": self.flow_id,
81+
"Flow Name": self.flow_name,
82+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
83+
"Setup ID": self.setup_id,
84+
"Setup String": self.setup_string,
85+
"Dataset ID": self.dataset_id,
86+
"Dataset URL": "{}d/{}".format(base_url, self.dataset_id)})
87+
if self.task_evaluation_measure in self.evaluations:
88+
value = self.evaluations[self.task_evaluation_measure]
89+
fields = fields.append(pd.Series({"Result": value}))
90+
91+
order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
92+
"Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
93+
"Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
94+
fields = list(fields.reindex(order).dropna().iteritems())
95+
96+
longest_field_name_length = max(len(name) for name, value in fields)
97+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
98+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
99+
return header + body
104100

105101
def _repr_pretty_(self, pp, cycle):
106102
pp.text(str(self))

openml/setups/setup.py

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import openml.config
2+
import pandas as pd
3+
14

25
class OpenMLSetup(object):
36
"""Setup object (a.k.a. Configuration).
@@ -26,17 +29,21 @@ def __init__(self, setup_id, flow_id, parameters):
2629
self.parameters = parameters
2730

2831
def __str__(self):
29-
object_dict = self.__dict__
30-
output_str = ''
31-
header = 'OpenML Setup'
32+
header = "OpenML Setup"
3233
header = '{}\n{}\n'.format(header, '=' * len(header))
33-
setup = '{:.<15}: {}\n'.format("Setup ID", object_dict['setup_id'])
34-
flow = '{:.<15}: {}\n'.format("Flow ID", object_dict['flow_id'])
35-
url = 'https://www.openml.org/f/' + str(object_dict['flow_id'])
36-
flow = flow + '{:.<15}: {}\n'.format("Flow URL", url)
37-
params = '{:.<15}: {}\n'.format("# of Parameters", len(object_dict['parameters']))
38-
output_str = '\n' + header + setup + flow + params + '\n'
39-
return(output_str)
34+
35+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
36+
fields = pd.Series({"Setup ID": self.setup_id,
37+
"Flow ID": self.flow_id,
38+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
39+
"# of Parameters": len(self.parameters)})
40+
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
41+
fields = list(fields.reindex(order).dropna().iteritems())
42+
43+
longest_field_name_length = max(len(name) for name, value in fields)
44+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
45+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
46+
return header + body
4047

4148

4249
class OpenMLParameter(object):
@@ -75,20 +82,31 @@ def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name,
7582
self.value = value
7683

7784
def __str__(self):
78-
object_dict = self.__dict__
79-
output_str = ''
80-
header = 'OpenML Parameter'
85+
header = "OpenML Parameter"
8186
header = '{}\n{}\n'.format(header, '=' * len(header))
82-
id = '{:.<18}: {}\n'.format("ID", object_dict['id'])
83-
flow = '{:.<18}: {}\n'.format("Flow ID", object_dict['flow_id'])
84-
flow = flow + '{:.<18}: {}\n'.format("Flow Name", object_dict['flow_name'])
85-
flow = flow + '{:.<18}: {}\n'.format("Flow Full Name", object_dict['full_name'])
86-
url = 'https://www.openml.org/f/' + str(object_dict['flow_id'])
87-
flow = flow + '{:.<18}: {}\n'.format("Flow URL", url)
88-
filler = " |" + "_" * 2
89-
params = '{:.<18}: {}\n'.format("Parameter Name", object_dict['parameter_name'])
90-
params = params + filler + '{:.<14}: {}\n'.format("Data_Type", object_dict['data_type'])
91-
params = params + filler + '{:.<14}: {}\n'.format("Default", object_dict['default_value'])
92-
params = params + filler + '{:.<14}: {}\n'.format("Value", object_dict['value'])
93-
output_str = '\n' + header + id + flow + params + '\n'
94-
return(output_str)
87+
88+
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
89+
fields = pd.Series({"ID": self.id,
90+
"Flow ID": self.flow_id,
91+
# "Flow Name": self.flow_name,
92+
"Flow Name": self.full_name,
93+
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
94+
"Parameter Name": self.parameter_name})
95+
# indented prints for parameter attributes
96+
# indention = 2 spaces + 1 | + 2 underscores
97+
indent = "{}|{}".format(" " * 2, "_" * 2)
98+
parameter_data_type = "{}Data Type".format(indent)
99+
parameter_default = "{}Default".format(indent)
100+
parameter_value = "{}Value".format(indent)
101+
fields = fields.append(pd.Series({parameter_data_type: self.data_type,
102+
parameter_default: self.default_value,
103+
parameter_value: self.value}))
104+
105+
order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name",
106+
parameter_data_type, parameter_default, parameter_value]
107+
fields = list(fields.reindex(order).dropna().iteritems())
108+
109+
longest_field_name_length = max(len(name) for name, value in fields)
110+
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
111+
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
112+
return header + body

0 commit comments

Comments
 (0)