Skip to content

Commit e5b2bfe

Browse files
committed
Merge pull request #72 from JasonShin/feature/datasets-boston
1 parent c8ab31a commit e5b2bfe

10 files changed

Lines changed: 9260 additions & 4 deletions

File tree

docs/processor/ExampleProcessor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export class ExampleProcessor extends BaseProcesser {
4141
};
4242
});
4343
const extraConfig = {
44-
exampleSidebar: config,
44+
exampleSidebar: config
4545
};
4646
// Writing extraConfig object as .vuepress/exampleExtra.json
4747
fs.writeFileSync(this.vuepressExampleConfigPath, JSON.stringify(extraConfig), 'utf-8');

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,11 @@
3535
"OneHotEncoding"
3636
],
3737
"scripts": {
38-
"build": "yarn clean && yarn build:main && yarn build:module && yarn build:web",
38+
"build": "yarn clean && yarn build:main && yarn build:module && yarn build:web && yarn build:datasets",
3939
"build:main": "npx tsc -p tsconfig.json && npx copy package.json ./build/lib && npx copy README.md ./build/lib && npx copy .npmignore ./build/lib",
4040
"build:module": "npx tsc -p tsconfig.module.json",
4141
"build:web": "npx tsc -p tsconfig.json --module amd --out build/lib/kalimdor.web.js",
42+
"build:datasets": "cp -r src/lib/datasets/data build/lib/datasets/data",
4243
"fix": "yarn fix:prettier && yarn fix:tslint",
4344
"fix:prettier": "npx prettier \"src/**/*.ts\" \"test/**/*.ts\" \"docs/**/*.ts\" --config ./.prettierrc --write",
4445
"fix:tslint": "npx tslint --fix --force -t verbose 'test/**/*.ts' 'src/**/*.ts' 'docs/**/*.ts'",

src/lib/datasets/BaseDataset.ts

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import * as fs from 'fs-extra';
12
import 'isomorphic-fetch';
23
import { parseInt, uniqBy } from 'lodash';
4+
import * as path from 'path';
35
import { LabelEncoder } from '../preprocessing/label';
46

57
/**
@@ -57,6 +59,24 @@ export class BaseDataset {
5759
};
5860
}
5961

62+
/**
63+
* Load data from the local data folder
64+
*/
65+
protected async fsLoad(
66+
type: string,
67+
{ delimiter = ',', lastIsTarget = true, trainType = 'float', targetType = 'float' } = {
68+
// Default object if nothing is provided
69+
delimiter: ',',
70+
lastIsTarget: true,
71+
trainType: 'float',
72+
targetType: 'float'
73+
}
74+
): Promise<{ data; targets; labels }> {
75+
// Make sure the actual data is located under data/type
76+
const data = fs.readFileSync(path.join(__dirname, `data/${type}/train.csv`), 'utf8');
77+
return this.processCSV(data, delimiter, lastIsTarget, trainType, targetType);
78+
}
79+
6080
/**
6181
* Processes CSV type dataset. Returns a training and testing data pair
6282
* @param data - a raw string data
@@ -119,7 +139,6 @@ export class BaseDataset {
119139
} else if (targetType === 'float') {
120140
result[1] = result[1].map(parseFloat);
121141
}
122-
123142
return {
124143
data: result[0],
125144
targets,

src/lib/datasets/Boston.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/**
2+
* References:
3+
* - https://www.kaggle.com/c/boston-housing/data
4+
* - https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html
5+
*/
6+
7+
import { BaseDataset } from './BaseDataset';
8+
9+
/**
10+
* This dataset contains information collected by the U.S Census Service concerning housing in the area of Boston Mass.
11+
* It was obtained from the StatLib archive (http://lib.stat.cmu.edu/datasets/boston),
12+
* and has been used extensively throughout the literature to benchmark algorithms.
13+
* However, these comparisons were primarily done outside of Delve and are thus somewhat suspect.
14+
* The dataset is small in size with only 506 cases.
15+
*
16+
* The data was originally published by Harrison, D. and Rubinfeld, D.L.
17+
* `Hedonic prices and the demand for clean air', J. Environ. Economics & Management, vol.5, 81-102, 1978.
18+
*
19+
* @example
20+
* import { Boston } from "kalimdor/datasets";
21+
*
22+
* (async function() {
23+
* const bostonData = new Boston();
24+
* const {
25+
* data,
26+
* targets,
27+
* labels,
28+
* } = await bostonData.load();
29+
* });
30+
*
31+
*/
32+
export class Boston extends BaseDataset {
33+
/**
34+
* Load the dataset
35+
*/
36+
public async load(): Promise<{
37+
/**
38+
* Training data
39+
*/
40+
data: any[][];
41+
/**
42+
* Target data
43+
*/
44+
targets: any[];
45+
/**
46+
* Real labels
47+
*/
48+
labels: string[];
49+
}> {
50+
const { data, targets, labels } = await this.fsLoad('boston');
51+
// console.info('checking data', data[0]);
52+
return {
53+
data,
54+
targets,
55+
labels
56+
};
57+
}
58+
}

src/lib/datasets/Iris.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import { BaseDataset } from './BaseDataset';
2323
* labels, // list of labels
2424
* targetNames, // list of short target labels
2525
* description // dataset description
26-
* } = irisData.load(); // loads the data internally
26+
* } = await irisData.load(); // loads the data internally
2727
* })();
2828
*
2929
*/

src/lib/datasets/data/boston/train.csv

Lines changed: 506 additions & 0 deletions
Large diffs are not rendered by default.

src/lib/datasets/index.repl.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,10 @@ irisData.load().then(data => {
66
const { description } = data;
77
console.log('checking desc', description);
88
});
9+
10+
import { Boston } from './Boston';
11+
12+
const bostonData = new Boston();
13+
bostonData.load().then(data => {
14+
console.log(data);
15+
});

test/datasets/Boston.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { Boston } from '../../src/lib/datasets/Boston';
2+
3+
describe('datasets:Boston', () => {
4+
let boston = null;
5+
beforeAll(() => {
6+
boston = new Boston();
7+
});
8+
it('should data match the snapshot', async () => {
9+
const { data } = await boston.load();
10+
expect(data).toMatchSnapshot();
11+
});
12+
13+
it('should targets match the snapshot', async () => {
14+
const { targets } = await boston.load();
15+
expect(targets).toMatchSnapshot();
16+
});
17+
18+
it('should labels match the snapshot', async () => {
19+
const { labels } = await boston.load();
20+
expect(labels).toMatchSnapshot();
21+
});
22+
});

0 commit comments

Comments
 (0)