Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5a19931
init feather implementation
sahithyaravi Nov 6, 2019
87907d9
Merge remote-tracking branch 'origin/develop' into feather_investigation
sahithyaravi Nov 8, 2019
6d2f5c3
sparse matrix
sahithyaravi Nov 8, 2019
33881ea
test notebook
sahithyaravi Nov 8, 2019
55743bd
feather pickle compare
sahithyaravi Nov 8, 2019
1437005
test arrow vs feather
sahithyaravi Nov 11, 2019
ef461d7
Merge remote-tracking branch 'origin/develop' into feather_investigation
sahithyaravi Nov 11, 2019
5c27237
add columns condition
sahithyaravi Nov 11, 2019
f61d9b5
Testing
sahithyaravi Nov 14, 2019
484869e
Merge branch 'develop' into feather_investigation
sahithyaravi Jan 6, 2020
3c513b0
get_dataset add cache format
sahithyaravi Jan 8, 2020
0b3d781
add pyarrow
sahithyaravi Jan 8, 2020
a9becf1
sparse matrix check
sahithyaravi Jan 8, 2020
aff8aff
pep8 and remove files
sahithyaravi Jan 8, 2020
48e2a16
return type
sahithyaravi Jan 8, 2020
19c22fe
fix type annotation
sahithyaravi Jan 8, 2020
98be055
value check
sahithyaravi Jan 8, 2020
112eb1d
change feather condition
sahithyaravi Jan 10, 2020
99fac3d
fixes and test
sahithyaravi Jan 11, 2020
cf3cbad
fix errors
sahithyaravi Jan 13, 2020
7583e88
Merge branch 'develop' into feather_investigation
sahithyaravi Jan 13, 2020
09d6bdb
testing file
sahithyaravi Jan 13, 2020
3aff927
feather new file for attributes
sahithyaravi Jan 14, 2020
b521534
change feather attribute file path
sahithyaravi Jan 14, 2020
8eb77cf
delete testing file
sahithyaravi Jan 14, 2020
4894bbd
testing changes
sahithyaravi Jan 14, 2020
b6839b1
delete pkls
sahithyaravi Jan 14, 2020
131bdad
fixes
sahithyaravi Jan 14, 2020
aeb9b98
fixes
sahithyaravi Jan 14, 2020
865d4dc
add comments
sahithyaravi Jan 15, 2020
701496f
change default caching
sahithyaravi Jan 22, 2020
f689897
pip version
sahithyaravi Jan 27, 2020
74f359e
review comment fixes
sahithyaravi Jan 29, 2020
19272e5
newline
sahithyaravi Jan 29, 2020
09a5469
fix if condition
sahithyaravi Jan 29, 2020
f0da5a1
Update install.sh
sahithyaravi Feb 3, 2020
ed8ca7b
pandas verison due to sparse data
sahithyaravi Feb 3, 2020
d7488f7
review #2
sahithyaravi Feb 11, 2020
d09c431
Update appveyor.yml
sahithyaravi Feb 17, 2020
bf44356
Update appveyor.yml
sahithyaravi Feb 18, 2020
e6bc0b0
rename cache dir
sahithyaravi Feb 18, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
review #2
  • Loading branch information
sahithyaravi committed Feb 11, 2020
commit d7488f7aa4f5d49c734b546c5cbfd13c00260bcb
1 change: 0 additions & 1 deletion openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def __init__(self, name, description, format=None,
paper_url=None, update_comment=None,
md5_checksum=None, data_file=None, features=None,
qualities=None, dataset=None):
print(cache_format)
if dataset_id is None:
if description and not re.match("^[\x00-\x7F]*$", description):
# not basiclatin (XSD complains)
Expand Down
2 changes: 1 addition & 1 deletion openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,7 @@ def _create_dataset_from_description(
features: Dict,
qualities: List,
arff_file: str = None,
cache_format: str = 'pickle'
cache_format: str = 'pickle',
) -> OpenMLDataset:
"""Create a dataset object from a description dict.

Expand Down
17 changes: 13 additions & 4 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,8 +1318,6 @@ def test_list_qualities(self):
self.assertEqual(all([isinstance(q, str) for q in qualities]), True)

def test_get_dataset_cache_format_pickle(self):
# Feather format cant be tested without installing pyarrow
# this test case checks if pickle option works
dataset = openml.datasets.get_dataset(1)
self.assertEqual(type(dataset), OpenMLDataset)
self.assertEqual(dataset.name, 'anneal')
Expand All @@ -1333,9 +1331,20 @@ def test_get_dataset_cache_format_pickle(self):
self.assertEqual(len(attribute_names), X.shape[1])

def test_get_dataset_cache_format_feather(self):
# Feather format cant be tested without installing pyarrow
# this test case checks if pickle option works

dataset = openml.datasets.get_dataset('iris', cache_format='feather')
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a test that the correct files are actually written to disk?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid this doesn't test the right thing. We change the cache directory for unit tests, so you'd need to use a slightly different data_folder here (https://github.com/openml/openml-python/blob/develop/openml/testing.py#L92)


# Check if dataset is written using feather
data_folder = os.path.join(openml.config.get_cache_directory(), 'datasets',
'128')
feather_file = os.path.join(data_folder, 'dataset.feather')
pickle_file = os.path.join(data_folder, 'dataset.feather.attributes.pkl.py3')
data = pd.read_feather(feather_file)
self.assertTrue(os.path.isfile(feather_file), msg='Feather file is missing')
self.assertTrue(os.path.isfile(pickle_file), msg='Attributes pickle file is missing')
self.assertEqual(data.shape, (150, 5))

# Check if get_data is able to retrieve feather data
self.assertEqual(type(dataset), OpenMLDataset)
self.assertEqual(dataset.name, 'iris')
self.assertGreater(len(dataset.features), 1)
Expand Down