Skip to content

Commit 9f8bea5

Browse files
typhoonzeroterrytangyuan
authored andcommitted
[ModelZoo] Test model works with sqlflow and edl (#27)
* add models base image * wip * wip * test ci * test ci * test ci * test ci * test ci * test ci * test ci * test ci * test edl ci * fix * test ci * update * compatible with ci * update * secure keys * just test * test ci * test ci * update * test elasticdl ci * test run edl * update * secure keys * update
1 parent a7b6dbe commit 9f8bea5

15 files changed

Lines changed: 173 additions & 48 deletions

.travis.yml

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,32 @@ branches:
44
only:
55
- master
66
- develop
7-
# https://docs.travis-ci.com/user/customizing-the-build/#safelisting-or-blocklisting-branches
8-
# safe list can prevent tag building, add rexp to detect tag
97
- "/^v\\d+\\.\\d+(\\.\\d+)?(-\\S*)?$/"
10-
118
language: python
129
python:
1310
- 3.6
1411
- 3.7
15-
12+
service:
13+
- docker
1614
install:
1715
- python setup.py -q install
18-
1916
script:
2017
- python setup.py -q test
18+
19+
jobs:
20+
include:
21+
- stage: ElasticDLTest
22+
script:
23+
- cd base_image && docker build -t sqlflow/modelzoo_base . && cd ..
24+
- cd sqlflow_models && docker build -t sqlflow/sqlflow_models . && cd ..
25+
- curl -s https://raw.githubusercontent.com/sql-machine-learning/elasticdl/4a995fe7eaf91bc5a9d50181e9aaaa14d15c8a09/scripts/setup_k8s_env.sh | bash
26+
- kubectl apply -f https://raw.githubusercontent.com/sql-machine-learning/elasticdl/develop/elasticdl/manifests/examples/elasticdl-rbac.yaml
27+
- docker run --rm -it --net=host
28+
-v $HOME/.kube:/root/.kube
29+
-v /home/$USER/.minikube/:/home/$USER/.minikube/
30+
-v /var/run/docker.sock:/var/run/docker.sock
31+
-v $PWD:/workspace
32+
-e ODPS_ACCESS_ID=$MAXCOMPUTE_AK
33+
-e ODPS_ACCESS_KEY=$MAXCOMPUTE_SK
34+
sqlflow/sqlflow_models bash /workspace/scripts/test_elasticdl_submit.sh
35+
- curl -s https://raw.githubusercontent.com/sql-machine-learning/elasticdl/c7f678e2617d1e0f20683a84275558e5adf8f452/scripts/validate_job_status.sh | bash /dev/stdin odps

base_image/Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ RUN curl --silent https://dl.google.com/go/go1.13.4.linux-amd64.tar.gz | tar -C
1111
# install ElasticDL to manage ElasticDL jobs
1212
RUN git clone https://github.com/sql-machine-learning/elasticdl.git && \
1313
cd elasticdl && \
14-
git checkout eb93e2a48e6fe8f077c4937d8c0c5987faa9cf56 && \
14+
git checkout 2efbe5bfcc94cad9ca1838da8aabe639e21c05d1 && \
1515
pip install -r elasticdl/requirements.txt && \
16-
python setup.py install
16+
python setup.py install && \
17+
cd .. && rm -rf elasticdl

scripts/data/iris.recordio

16.4 KB
Binary file not shown.

scripts/test_elasticdl_submit.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
elasticdl train --image_base=sqlflow/sqlflow_models \
4+
--model_def=dnnclassifier.DNNClassifier \
5+
--training_data=sqlflow_test_iris_train \
6+
--data_reader_params='columns=["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]' \
7+
--envs="ODPS_PROJECT_NAME=gomaxcompute_driver_w7u,ODPS_ACCESS_ID=$ODPS_ACCESS_ID,ODPS_ACCESS_KEY=$ODPS_ACCESS_KEY" \
8+
--minibatch_size=32 \
9+
--num_epochs=2 \
10+
--model_zoo=/sqlflow_models \
11+
--job_name=test-odps \
12+
--num_minibatches_per_task=2 \
13+
--image_pull_policy=Never \
14+
--num_workers=2 \
15+
--master_resource_request="cpu=400m,memory=256Mi" \
16+
--master_resource_limit="cpu=1,memory=2048Mi" \
17+
--worker_resource_request="cpu=400m,memory=256Mi" \
18+
--worker_resource_limit="cpu=1,memory=3072Mi" \
19+
--grads_to_wait=2 \
20+
--output=model_output

sqlflow_models/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
FROM sqlflow/modelzoo_base
22

3-
RUN pip install tensorflow==2.0.0b1 scikit-learn==0.20.0 numpy==1.16.2 pandas==0.25.1
3+
RUN pip install tensorflow==2.0.0 scikit-learn==0.20.0 numpy==1.16.2 pandas==0.25.1
44
ADD *.py /sqlflow_models/

sqlflow_models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
from .dnnclassifier import DNNClassifier
33
from .lstmclassifier import StackedBiLSTMClassifier
44
from .deep_embedding_cluster import DeepEmbeddingClusterModel
5-
from . import dnnclassifier_functional_api_example
5+
from .dnnclassifier_functional_api_example import dnnclassifier_functional_model

sqlflow_models/deep_embedding_cluster.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,14 @@ def optimizer():
269269
global _train_lr
270270
return SGD(lr=_train_lr, momentum=0.9)
271271

272-
def loss():
272+
def loss(output, labels):
273273
global _default_loss
274274
return _default_loss
275275

276+
# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
277+
def loss_new(y_true, y_pred):
278+
return _default_loss(y_true, y_pred)
279+
276280
def prepare_prediction_column(prediction):
277281
""" Return the cluster label of the highest probability. """
278282
return prediction.argmax(axis=-1)

sqlflow_models/dnnclassifier.py

Lines changed: 77 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import tensorflow as tf
22

33
class DNNClassifier(tf.keras.Model):
4-
def __init__(self, feature_columns, hidden_units=[10,10], n_classes=2):
4+
def __init__(self, feature_columns=None, hidden_units=[10,10], n_classes=3):
55
"""DNNClassifier
66
:param feature_columns: feature columns.
77
:type feature_columns: list[tf.feature_column].
@@ -11,16 +11,20 @@ def __init__(self, feature_columns, hidden_units=[10,10], n_classes=2):
1111
:type n_classes: int.
1212
"""
1313
super(DNNClassifier, self).__init__()
14-
15-
# combines all the data as a dense tensor
16-
self.feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
14+
self.feature_layer = None
15+
if feature_columns is not None:
16+
# combines all the data as a dense tensor
17+
self.feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
1718
self.hidden_layers = []
1819
for hidden_unit in hidden_units:
1920
self.hidden_layers.append(tf.keras.layers.Dense(hidden_unit))
2021
self.prediction_layer = tf.keras.layers.Dense(n_classes, activation='softmax')
2122

22-
def call(self, inputs):
23-
x = self.feature_layer(inputs)
23+
def call(self, inputs, training=True):
24+
if self.feature_layer is not None:
25+
x = self.feature_layer(inputs)
26+
else:
27+
x = tf.keras.layers.Flatten()(inputs)
2428
for hidden_layer in self.hidden_layers:
2529
x = hidden_layer(x)
2630
return self.prediction_layer(x)
@@ -29,10 +33,74 @@ def optimizer(learning_rate=0.1):
2933
"""Default optimizer name. Used in model.compile."""
3034
return tf.keras.optimizers.Adagrad(lr=learning_rate)
3135

32-
def loss():
36+
def loss(output, labels):
3337
"""Default loss function. Used in model.compile."""
34-
return 'sparse_categorical_crossentropy'
38+
# return 'sparse_categorical_crossentropy'
39+
return tf.reduce_mean(
40+
tf.keras.losses.sparse_categorical_crossentropy(labels, output))
41+
42+
# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
43+
def loss_new(y_true, y_pred):
44+
return tf.reduce_mean(
45+
tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred))
3546

3647
def prepare_prediction_column(prediction):
3748
"""Return the class label of highest probability."""
38-
return prediction.argmax(axis=-1)
49+
return prediction.argmax(axis=-1)
50+
51+
def eval_metrics_fn():
52+
return {
53+
"accuracy": lambda labels, predictions: tf.equal(
54+
tf.argmax(predictions, 1, output_type=tf.int32),
55+
tf.cast(tf.reshape(labels, [-1]), tf.int32),
56+
)
57+
}
58+
59+
# dataset_fn is only used to test using this model in ElasticDL.
60+
# TODO(typhoonzero): remove dataset_fn once https://github.com/sql-machine-learning/elasticdl/issues/1482 is done.
61+
def dataset_fn(dataset, mode, metadata):
62+
from elasticdl.python.common.constants import Mode
63+
def _parse_data(record):
64+
label_col_name = "class"
65+
record = tf.strings.to_number(record, tf.float32)
66+
67+
def _get_features_without_labels(
68+
record, label_col_ind, features_shape
69+
):
70+
features = [
71+
record[:label_col_ind],
72+
record[label_col_ind + 1 :], # noqa: E203
73+
]
74+
features = tf.concat(features, -1)
75+
return tf.reshape(features, features_shape)
76+
77+
features_shape = (4, 1)
78+
labels_shape = (1,)
79+
if mode != Mode.PREDICTION:
80+
if label_col_name not in metadata.column_names:
81+
raise ValueError(
82+
"Missing the label column '%s' in the retrieved "
83+
"ODPS table." % label_col_name
84+
)
85+
label_col_ind = metadata.column_names.index(label_col_name)
86+
labels = tf.reshape(record[label_col_ind], labels_shape)
87+
return (
88+
_get_features_without_labels(
89+
record, label_col_ind, features_shape
90+
),
91+
labels,
92+
)
93+
else:
94+
if label_col_name in metadata.column_names:
95+
label_col_ind = metadata.column_names.index(label_col_name)
96+
return _get_features_without_labels(
97+
record, label_col_ind, features_shape
98+
)
99+
else:
100+
return tf.reshape(record, features_shape)
101+
102+
dataset = dataset.map(_parse_data)
103+
104+
if mode == Mode.TRAINING:
105+
dataset = dataset.shuffle(buffer_size=200)
106+
return dataset

sqlflow_models/dnnclassifier_functional_api_example.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import tensorflow as tf
22

3-
def get_model(feature_columns, field_metas, learning_rate=0.01):
3+
def dnnclassifier_functional_model(feature_columns, field_metas, learning_rate=0.01):
44
feature_layer_inputs = dict()
55
for fmkey in field_metas:
66
fm = field_metas[fmkey]
@@ -13,8 +13,12 @@ def get_model(feature_columns, field_metas, learning_rate=0.01):
1313
pred = tf.keras.layers.Dense(1, activation='sigmoid')(x)
1414
return tf.keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=pred)
1515

16-
def loss():
17-
return 'binary_crossentropy'
16+
def loss(output, labels):
17+
return tf.reduce_mean(tf.keras.losses.binary_crossentropy(labels, output))
18+
19+
# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
20+
def loss_new(y_true, y_pred):
21+
return tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred))
1822

1923
def epochs():
2024
return 1

sqlflow_models/lstmclassifier.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
_loss = ''
44

55
class StackedBiLSTMClassifier(tf.keras.Model):
6-
def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=2):
6+
def __init__(self, feature_columns=None, stack_units=[32], hidden_size=64, n_classes=2):
77
"""StackedBiLSTMClassifier
88
:param feature_columns: All columns must be embedding of sequence column with same sequence_length.
99
:type feature_columns: list[tf.embedding_column].
@@ -15,7 +15,9 @@ def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=
1515
global _loss
1616
super(StackedBiLSTMClassifier, self).__init__()
1717

18-
self.feature_layer = tf.keras.experimental.SequenceFeatures(feature_columns)
18+
self.feature_layer = None
19+
if feature_columns is not None:
20+
self.feature_layer = tf.keras.experimental.SequenceFeatures(feature_columns)
1921
self.stack_bilstm = []
2022
self.stack_size = len(stack_units)
2123
self.stack_units = stack_units
@@ -37,7 +39,10 @@ def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=
3739
self.pred = tf.keras.layers.Dense(n_classes, activation=pred_act)
3840

3941
def call(self, inputs):
40-
x, seq_len = self.feature_layer(inputs)
42+
if self.feature_layer:
43+
x, seq_len = self.feature_layer(inputs)
44+
else:
45+
x, seq_len = inputs
4146
seq_mask = tf.sequence_mask(seq_len)
4247
if self.stack_size > 1:
4348
for i in range(self.stack_size - 1):
@@ -50,10 +55,29 @@ def optimizer():
5055
"""Default optimizer name. Used in model.compile."""
5156
return 'adam'
5257

53-
def loss():
58+
def loss(output, labels):
59+
global _loss
60+
if _loss == "binary_crossentropy":
61+
return tf.reduce_mean(tf.keras.losses.binary_crossentropy(labels, output))
62+
elif _loss == "categorical_crossentropy":
63+
return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(labels, output))
64+
65+
# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
66+
def loss_new(y_true, y_pred):
5467
global _loss
55-
return _loss
68+
if _loss == "binary_crossentropy":
69+
return tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred))
70+
elif _loss == "categorical_crossentropy":
71+
return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true, y_pred))
5672

5773
def prepare_prediction_column(prediction):
5874
"""Return the class label of highest probability."""
5975
return prediction.argmax(axis=-1)
76+
77+
def eval_metrics_fn():
78+
return {
79+
"accuracy": lambda labels, predictions: tf.equal(
80+
tf.argmax(predictions, 1, output_type=tf.int32),
81+
tf.cast(tf.reshape(labels, [-1]), tf.int32),
82+
)
83+
}

0 commit comments

Comments
 (0)