diff --git a/TensorFlow/Segmentation/VNet/Dockerfile b/TensorFlow/Segmentation/VNet/Dockerfile index 36beca0ef..949dbdac5 100644 --- a/TensorFlow/Segmentation/VNet/Dockerfile +++ b/TensorFlow/Segmentation/VNet/Dockerfile @@ -4,7 +4,6 @@ ADD . /workspace/vnet WORKDIR /workspace/vnet RUN pip install --upgrade pip -RUN pip install --user git+https://github.com/NVIDIA/dllogger RUN pip install --disable-pip-version-check -r requirements.txt diff --git a/TensorFlow/Segmentation/VNet/hooks/profiling_hook.py b/TensorFlow/Segmentation/VNet/hooks/profiling_hook.py index dd950eda7..4a230976c 100644 --- a/TensorFlow/Segmentation/VNet/hooks/profiling_hook.py +++ b/TensorFlow/Segmentation/VNet/hooks/profiling_hook.py @@ -36,6 +36,6 @@ def before_run(self, run_context): def end(self, session): deltas = [self._timestamps[i + 1] - self._timestamps[i] for i in range(len(self._timestamps) - 1)] - self._logger.log(step=self._step, data={ - 'average_throughput_' + 'train' if self._training else 'test': self._global_batch_size / np.mean(deltas)}) + self._logger.log(step=(), data={ + 'average_throughput_train' if self._training else 'average_throughput_test': self._global_batch_size / np.mean(deltas)}) self._logger.flush() diff --git a/TensorFlow/Segmentation/VNet/hooks/train_hook.py b/TensorFlow/Segmentation/VNet/hooks/train_hook.py index b1fa9fc1b..29ef634c4 100644 --- a/TensorFlow/Segmentation/VNet/hooks/train_hook.py +++ b/TensorFlow/Segmentation/VNet/hooks/train_hook.py @@ -36,7 +36,7 @@ def after_run(self, run_context, run_values): if self._step % self._log_every == 0: - self._logger.log(step=self._step, data={'total_loss': run_values.results[0]}) + self._logger.log(step=(self._step,), data={'total_loss': str(run_values.results[0])}) self._step += 1 def end(self, session): diff --git a/TensorFlow/Segmentation/VNet/main.py b/TensorFlow/Segmentation/VNet/main.py index 2ebe094f8..e8065d571 100644 --- a/TensorFlow/Segmentation/VNet/main.py +++ b/TensorFlow/Segmentation/VNet/main.py @@ -87,7 +87,7 @@ def main(_): run_config = tf.estimator.RunConfig( save_summary_steps=None, - save_checkpoints_steps=dataset.train_steps * FLAGS.train_epochs, + save_checkpoints_steps=None if FLAGS.benchmark else dataset.train_steps * FLAGS.train_epoch, save_checkpoints_secs=None, tf_random_seed=None, session_config=config, @@ -112,22 +112,32 @@ def main(_): if hvd.rank() == 0: train_hooks += [TrainHook(FLAGS.log_every, DLLogger)] + DLLogger.log(step=tuple(), data={"training": "START"}) + estimator.train( input_fn=lambda: dataset.train_fn(FLAGS.augment), steps=steps, hooks=train_hooks) + DLLogger.log(step=tuple(), data={"training": "FINISHED"}) + if 'evaluate' in FLAGS.exec_mode: if hvd.rank() == 0: if FLAGS.train_split >= 1.0: raise ValueError("Missing argument: --train_split < 1.0") + + DLLogger.log(step=tuple(), data={"evaluating": "START"}) + result = estimator.evaluate( input_fn=dataset.eval_fn, steps=dataset.eval_steps, hooks=[]) - DLLogger.log(step=tuple(), data={'background_dice': result['background dice']}) - DLLogger.log(step=tuple(), data={'anterior_dice': result['Anterior dice']}) - DLLogger.log(step=tuple(), data={'posterior_dice': result['Posterior dice']}) + + DLLogger.log(step=tuple(), data={"evaluating": "FINISH"}) + + DLLogger.log(step=tuple(), data={'background_dice': str(result['background dice'])}) + DLLogger.log(step=tuple(), data={'anterior_dice': str(result['Anterior dice'])}) + DLLogger.log(step=tuple(), data={'posterior_dice': str(result['Posterior dice'])}) if 'predict' in FLAGS.exec_mode: count = 1 diff --git a/TensorFlow/Segmentation/VNet/requirements.txt b/TensorFlow/Segmentation/VNet/requirements.txt index f00d77402..a3ba7512d 100644 --- a/TensorFlow/Segmentation/VNet/requirements.txt +++ b/TensorFlow/Segmentation/VNet/requirements.txt @@ -2,3 +2,4 @@ SimpleITK==1.1.0 requests googledrivedownloader tf2onnx +git+git://github.com/NVIDIA/dllogger#egg=dllogger \ No newline at end of file