[ModelZoo] Test model works with sqlflow and edl (#27)

typhoonzero · terrytangyuan · commit 9f8bea5509ee · 2019-11-21T21:13:04.000-05:00
* add models base image

* wip

* wip

* test ci

* test ci

* test ci

* test ci

* test ci

* test ci

* test ci

* test ci

* test edl ci

* fix

* test ci

* update

* compatible with ci

* update

* secure keys

* just test

* test ci

* test ci

* update

* test elasticdl ci

* test run edl

* update

* secure keys

* update
diff --git a/.travis.yml b/.travis.yml
@@ -4,17 +4,32 @@ branches:
   only:
   - master
   - develop
-  # https://docs.travis-ci.com/user/customizing-the-build/#safelisting-or-blocklisting-branches
-  # safe list can prevent tag building, add rexp to detect tag
   - "/^v\\d+\\.\\d+(\\.\\d+)?(-\\S*)?$/"
-
 language: python
 python:
 - 3.6
 - 3.7
-
+service:
+- docker
 install:
 - python setup.py -q install
-
 script:
 - python setup.py -q test
+
+jobs:
+  include:
+  - stage: ElasticDLTest
+    script:
+    - cd base_image && docker build -t sqlflow/modelzoo_base . && cd ..
+    - cd sqlflow_models && docker build -t sqlflow/sqlflow_models . && cd ..
+    - curl -s https://raw.githubusercontent.com/sql-machine-learning/elasticdl/4a995fe7eaf91bc5a9d50181e9aaaa14d15c8a09/scripts/setup_k8s_env.sh | bash
+    - kubectl apply -f https://raw.githubusercontent.com/sql-machine-learning/elasticdl/develop/elasticdl/manifests/examples/elasticdl-rbac.yaml
+    - docker run --rm -it --net=host
+      -v $HOME/.kube:/root/.kube
+      -v /home/$USER/.minikube/:/home/$USER/.minikube/
+      -v /var/run/docker.sock:/var/run/docker.sock
+      -v $PWD:/workspace
+      -e ODPS_ACCESS_ID=$MAXCOMPUTE_AK
+      -e ODPS_ACCESS_KEY=$MAXCOMPUTE_SK
+      sqlflow/sqlflow_models bash /workspace/scripts/test_elasticdl_submit.sh
+    - curl -s https://raw.githubusercontent.com/sql-machine-learning/elasticdl/c7f678e2617d1e0f20683a84275558e5adf8f452/scripts/validate_job_status.sh | bash /dev/stdin odps
diff --git a/base_image/Dockerfile b/base_image/Dockerfile
@@ -11,6 +11,7 @@ RUN curl --silent https://dl.google.com/go/go1.13.4.linux-amd64.tar.gz | tar -C
 # install ElasticDL to manage ElasticDL jobs
 RUN git clone https://github.com/sql-machine-learning/elasticdl.git && \
 cd elasticdl && \
-git checkout eb93e2a48e6fe8f077c4937d8c0c5987faa9cf56 && \
+git checkout 2efbe5bfcc94cad9ca1838da8aabe639e21c05d1 && \
 pip install -r elasticdl/requirements.txt && \
-python setup.py install
+python setup.py install && \
+cd .. && rm -rf elasticdl
diff --git a/scripts/data/iris.recordio b/scripts/data/iris.recordio
diff --git a/scripts/test_elasticdl_submit.sh b/scripts/test_elasticdl_submit.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+elasticdl train --image_base=sqlflow/sqlflow_models \
+--model_def=dnnclassifier.DNNClassifier \
+--training_data=sqlflow_test_iris_train \
+--data_reader_params='columns=["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]' \
+--envs="ODPS_PROJECT_NAME=gomaxcompute_driver_w7u,ODPS_ACCESS_ID=$ODPS_ACCESS_ID,ODPS_ACCESS_KEY=$ODPS_ACCESS_KEY" \
+--minibatch_size=32 \
+--num_epochs=2 \
+--model_zoo=/sqlflow_models \
+--job_name=test-odps \
+--num_minibatches_per_task=2 \
+--image_pull_policy=Never \
+--num_workers=2 \
+--master_resource_request="cpu=400m,memory=256Mi" \
+--master_resource_limit="cpu=1,memory=2048Mi" \
+--worker_resource_request="cpu=400m,memory=256Mi" \
+--worker_resource_limit="cpu=1,memory=3072Mi" \
+--grads_to_wait=2 \
+--output=model_output
diff --git a/sqlflow_models/Dockerfile b/sqlflow_models/Dockerfile
@@ -1,4 +1,4 @@
 FROM sqlflow/modelzoo_base
 
-RUN pip install tensorflow==2.0.0b1 scikit-learn==0.20.0 numpy==1.16.2 pandas==0.25.1
+RUN pip install tensorflow==2.0.0 scikit-learn==0.20.0 numpy==1.16.2 pandas==0.25.1
 ADD *.py /sqlflow_models/
diff --git a/sqlflow_models/__init__.py b/sqlflow_models/__init__.py
@@ -2,4 +2,4 @@
 from .dnnclassifier import DNNClassifier
 from .lstmclassifier import StackedBiLSTMClassifier
 from .deep_embedding_cluster import DeepEmbeddingClusterModel
-from . import dnnclassifier_functional_api_example
+from .dnnclassifier_functional_api_example import dnnclassifier_functional_model
diff --git a/sqlflow_models/deep_embedding_cluster.py b/sqlflow_models/deep_embedding_cluster.py
@@ -269,10 +269,14 @@ def optimizer():
     global _train_lr
     return SGD(lr=_train_lr, momentum=0.9)
 
-def loss():
+def loss(output, labels):
     global _default_loss
     return _default_loss
 
+# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
+def loss_new(y_true, y_pred):
+    return _default_loss(y_true, y_pred)
+
 def prepare_prediction_column(prediction):
     """ Return the cluster label of the highest probability. """
     return prediction.argmax(axis=-1)
diff --git a/sqlflow_models/dnnclassifier.py b/sqlflow_models/dnnclassifier.py
@@ -1,7 +1,7 @@
 import tensorflow as tf
 
 class DNNClassifier(tf.keras.Model):
-    def __init__(self, feature_columns, hidden_units=[10,10], n_classes=2):
+    def __init__(self, feature_columns=None, hidden_units=[10,10], n_classes=3):
         """DNNClassifier
         :param feature_columns: feature columns.
         :type feature_columns: list[tf.feature_column].
@@ -11,16 +11,20 @@ def __init__(self, feature_columns, hidden_units=[10,10], n_classes=2):
         :type n_classes: int.
         """
         super(DNNClassifier, self).__init__()
-
-        # combines all the data as a dense tensor
-        self.feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
+        self.feature_layer = None
+        if feature_columns is not None:
+            # combines all the data as a dense tensor
+            self.feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
         self.hidden_layers = []
         for hidden_unit in hidden_units:
             self.hidden_layers.append(tf.keras.layers.Dense(hidden_unit))
         self.prediction_layer = tf.keras.layers.Dense(n_classes, activation='softmax')
 
-    def call(self, inputs):
-        x = self.feature_layer(inputs)
+    def call(self, inputs, training=True):
+        if self.feature_layer is not None:
+            x = self.feature_layer(inputs)
+        else:
+            x = tf.keras.layers.Flatten()(inputs)
         for hidden_layer in self.hidden_layers:
             x = hidden_layer(x)
         return self.prediction_layer(x)
@@ -29,10 +33,74 @@ def optimizer(learning_rate=0.1):
     """Default optimizer name. Used in model.compile."""
     return tf.keras.optimizers.Adagrad(lr=learning_rate)
 
-def loss():
+def loss(output, labels):
     """Default loss function. Used in model.compile."""
-    return 'sparse_categorical_crossentropy'
+    # return 'sparse_categorical_crossentropy'
+    return tf.reduce_mean(
+        tf.keras.losses.sparse_categorical_crossentropy(labels, output))
+
+# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
+def loss_new(y_true, y_pred):
+    return tf.reduce_mean(
+        tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred))
 
 def prepare_prediction_column(prediction):
     """Return the class label of highest probability."""
-    return prediction.argmax(axis=-1)
+    return prediction.argmax(axis=-1)
+
+def eval_metrics_fn():
+    return {
+        "accuracy": lambda labels, predictions: tf.equal(
+            tf.argmax(predictions, 1, output_type=tf.int32),
+            tf.cast(tf.reshape(labels, [-1]), tf.int32),
+        )
+    }
+
+# dataset_fn is only used to test using this model in ElasticDL.
+# TODO(typhoonzero): remove dataset_fn once https://github.com/sql-machine-learning/elasticdl/issues/1482 is done.
+def dataset_fn(dataset, mode, metadata):
+    from elasticdl.python.common.constants import Mode
+    def _parse_data(record):
+        label_col_name = "class"
+        record = tf.strings.to_number(record, tf.float32)
+
+        def _get_features_without_labels(
+            record, label_col_ind, features_shape
+        ):
+            features = [
+                record[:label_col_ind],
+                record[label_col_ind + 1 :],  # noqa: E203
+            ]
+            features = tf.concat(features, -1)
+            return tf.reshape(features, features_shape)
+
+        features_shape = (4, 1)
+        labels_shape = (1,)
+        if mode != Mode.PREDICTION:
+            if label_col_name not in metadata.column_names:
+                raise ValueError(
+                    "Missing the label column '%s' in the retrieved "
+                    "ODPS table." % label_col_name
+                )
+            label_col_ind = metadata.column_names.index(label_col_name)
+            labels = tf.reshape(record[label_col_ind], labels_shape)
+            return (
+                _get_features_without_labels(
+                    record, label_col_ind, features_shape
+                ),
+                labels,
+            )
+        else:
+            if label_col_name in metadata.column_names:
+                label_col_ind = metadata.column_names.index(label_col_name)
+                return _get_features_without_labels(
+                    record, label_col_ind, features_shape
+                )
+            else:
+                return tf.reshape(record, features_shape)
+
+    dataset = dataset.map(_parse_data)
+
+    if mode == Mode.TRAINING:
+        dataset = dataset.shuffle(buffer_size=200)
+    return dataset
diff --git a/sqlflow_models/dnnclassifier_functional_api_example.py b/sqlflow_models/dnnclassifier_functional_api_example.py
@@ -1,6 +1,6 @@
 import tensorflow as tf
 
-def get_model(feature_columns, field_metas, learning_rate=0.01):
+def dnnclassifier_functional_model(feature_columns, field_metas, learning_rate=0.01):
     feature_layer_inputs = dict()
     for fmkey in field_metas:
         fm = field_metas[fmkey]
@@ -13,8 +13,12 @@ def get_model(feature_columns, field_metas, learning_rate=0.01):
     pred = tf.keras.layers.Dense(1, activation='sigmoid')(x)
     return tf.keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=pred)
 
-def loss():
-    return 'binary_crossentropy'
+def loss(output, labels):
+    return tf.reduce_mean(tf.keras.losses.binary_crossentropy(labels, output))
+
+# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
+def loss_new(y_true, y_pred):
+    return tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred))
 
 def epochs():
     return 1
diff --git a/sqlflow_models/lstmclassifier.py b/sqlflow_models/lstmclassifier.py
@@ -3,7 +3,7 @@
 _loss = ''
 
 class StackedBiLSTMClassifier(tf.keras.Model):
-    def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=2):
+    def __init__(self, feature_columns=None, stack_units=[32], hidden_size=64, n_classes=2):
         """StackedBiLSTMClassifier
         :param feature_columns: All columns must be embedding of sequence column with same sequence_length.
         :type feature_columns: list[tf.embedding_column].
@@ -15,7 +15,9 @@ def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=
         global _loss
         super(StackedBiLSTMClassifier, self).__init__()
 
-        self.feature_layer = tf.keras.experimental.SequenceFeatures(feature_columns)
+        self.feature_layer = None
+        if feature_columns is not None:
+            self.feature_layer = tf.keras.experimental.SequenceFeatures(feature_columns)
         self.stack_bilstm = []
         self.stack_size = len(stack_units)
         self.stack_units = stack_units
@@ -37,7 +39,10 @@ def __init__(self, feature_columns, stack_units=[32], hidden_size=64, n_classes=
         self.pred = tf.keras.layers.Dense(n_classes, activation=pred_act)
 
     def call(self, inputs):
-        x, seq_len = self.feature_layer(inputs)
+        if self.feature_layer:
+            x, seq_len = self.feature_layer(inputs)
+        else:
+            x, seq_len = inputs
         seq_mask = tf.sequence_mask(seq_len)
         if self.stack_size > 1:
             for i in range(self.stack_size - 1):
@@ -50,10 +55,29 @@ def optimizer():
     """Default optimizer name. Used in model.compile."""
     return 'adam'
 
-def loss():
+def loss(output, labels):
+    global _loss
+    if _loss == "binary_crossentropy":
+        return tf.reduce_mean(tf.keras.losses.binary_crossentropy(labels, output))
+    elif _loss == "categorical_crossentropy":
+        return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(labels, output))
+
+# FIXME(typhoonzero): use the name loss once ElasticDL has updated.
+def loss_new(y_true, y_pred):
     global _loss
-    return _loss
+    if _loss == "binary_crossentropy":
+        return tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred))
+    elif _loss == "categorical_crossentropy":
+        return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true, y_pred))
 
 def prepare_prediction_column(prediction):
     """Return the class label of highest probability."""
     return prediction.argmax(axis=-1)
+
+def eval_metrics_fn():
+    return {
+        "accuracy": lambda labels, predictions: tf.equal(
+            tf.argmax(predictions, 1, output_type=tf.int32),
+            tf.cast(tf.reshape(labels, [-1]), tf.int32),
+        )
+    }
diff --git a/tests/base.py b/tests/base.py
@@ -22,9 +22,9 @@ def setUp(self):
 
         def test_train_and_predict(self):
             self.setUp()
-            model_pkg = sys.modules[self.model.__module__]
+            model_pkg = sys.modules[self.model_class.__module__]
             self.model.compile(optimizer=model_pkg.optimizer(),
-                loss=model_pkg.loss(),
+                loss=model_pkg.loss_new,
                 metrics=["accuracy"])
             self.model.fit(train_input_fn(self.features, self.label),
                 epochs=1,
diff --git a/tests/test_deep_embedding_cluster.py b/tests/test_deep_embedding_cluster.py
@@ -66,8 +66,6 @@ def setUp(self):
         for feature_name, feature_values in zip(feature_column_names, list(x.T)):
             self.features[feature_name] = feature_values
 
-        # print(self.features)
-
         self.label = y
         feature_columns = [tf.feature_column.numeric_column(key) for key in self.features]
         pretrain_dims = [500, 500, 2000, 10]
@@ -87,12 +85,13 @@ def setUp(self):
                                                               update_interval=20,
                                                               tol=0.001,
                                                               loss=kld)
+        self.model_class = sqlflow_models.DeepEmbeddingClusterModel
 
     def test_train_and_predict(self):
         self.setUp()
-        model_pkg = sys.modules[self.model.__module__]
+        model_pkg = sys.modules[self.model_class.__module__]
         self.model.compile(optimizer=model_pkg.optimizer(),
-                           loss=model_pkg.loss())
+                           loss=model_pkg.loss_new)
         self.model.sqlflow_train_loop(train_input_fn(self.features, self.label))
         metric = evaluate(x=eval_input_fn(self.features, self.label), y=self.label, model=self.model)
         print(metric)
diff --git a/tests/test_dnnclassifier.py b/tests/test_dnnclassifier.py
@@ -14,6 +14,7 @@ def setUp(self):
         self.label = [0 for _ in range(50)] + [1 for _ in range(50)]
         feature_columns = [tf.feature_column.numeric_column(key) for key in
                            self.features]
+        self.model_class = sqlflow_models.DNNClassifier
         self.model = sqlflow_models.DNNClassifier(feature_columns=feature_columns)
 
 
diff --git a/tests/test_dnnclassifier_functional_api_example.py b/tests/test_dnnclassifier_functional_api_example.py
@@ -3,6 +3,7 @@
 
 import tensorflow as tf
 import unittest
+import sys
 
 
 def train_input_fn(features, labels, batch_size=32):
@@ -31,22 +32,9 @@ def setUp(self):
             "c3": {"name": "c3", "shape": [1], "dtype": tf.float32},
             "c4": {"name": "c4", "shape": [1], "dtype": tf.float32},
         }
-        self.module = sqlflow_models.dnnclassifier_functional_api_example
-        self.model = sqlflow_models.dnnclassifier_functional_api_example.get_model(feature_columns=feature_columns, field_metas=fieldmetas)
+        self.model = sqlflow_models.dnnclassifier_functional_model(feature_columns=feature_columns, field_metas=fieldmetas)
+        self.model_class = sqlflow_models.dnnclassifier_functional_model
 
-    def test_train_and_predict(self):
-            self.setUp()
-
-            self.model.compile(optimizer=self.module.optimizer(),
-                loss=self.module.loss(),
-                metrics=["accuracy"])
-            self.model.fit(train_input_fn(self.features, self.label),
-                epochs=self.module.epochs(),
-                steps_per_epoch=100, verbose=0)
-            loss, acc = self.model.evaluate(eval_input_fn(self.features, self.label))
-            print(loss, acc)
-            assert(loss < 10)
-            assert(acc > 0.1)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/test_lstm.py b/tests/test_lstm.py
@@ -20,6 +20,7 @@ def setUp(self):
             dimension=32)
         feature_columns = [emb]
         self.model = sqlflow_models.StackedBiLSTMClassifier(feature_columns=feature_columns, stack_units=[64, 32])
+        self.model_class = sqlflow_models.DNNClassifier
 
 
 if __name__ == '__main__':