From 295089640e09a43f012db15fe15d49a3e625814a Mon Sep 17 00:00:00 2001 From: Pablo Ribalta Date: Tue, 3 Dec 2019 09:36:28 +0100 Subject: [PATCH] Cleanup Signed-off-by: Pablo Ribalta --- TensorFlow/Segmentation/VNet/Dockerfile | 4 +- .../Segmentation/VNet/dllogger/.gitignore | 125 -------------- TensorFlow/Segmentation/VNet/dllogger/LICENSE | 13 -- .../Segmentation/VNet/dllogger/README.md | 83 --------- .../Segmentation/VNet/dllogger/__init__.py | 0 .../VNet/dllogger/dllogger/__init__.py | 64 ------- .../VNet/dllogger/dllogger/logger.py | 161 ------------------ .../VNet/dllogger/examples/__init__.py | 0 .../dllogger/examples/dllogger_example.py | 68 -------- .../examples/dllogger_singleton_example.py | 68 -------- .../dllogger/examples/example_resnet_log.json | 52 ------ .../dllogger/examples/example_stdout_log.json | 49 ------ .../Segmentation/VNet/dllogger/setup.py | 40 ----- TensorFlow/Segmentation/VNet/main.py | 4 +- 14 files changed, 4 insertions(+), 727 deletions(-) delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/.gitignore delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/LICENSE delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/README.md delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/__init__.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/dllogger/__init__.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/dllogger/logger.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/examples/__init__.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_example.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_singleton_example.py delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/examples/example_resnet_log.json delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/examples/example_stdout_log.json delete mode 100644 TensorFlow/Segmentation/VNet/dllogger/setup.py diff --git a/TensorFlow/Segmentation/VNet/Dockerfile b/TensorFlow/Segmentation/VNet/Dockerfile index 9aef2ab96..36beca0ef 100644 --- a/TensorFlow/Segmentation/VNet/Dockerfile +++ b/TensorFlow/Segmentation/VNet/Dockerfile @@ -1,10 +1,10 @@ -FROM gitlab-master.nvidia.com:5005/dl/dgx/tensorflow:19.11-tf1-py3-devel +FROM nvcr.io/nvidia/tensorflow:19.11-tf1-py3 ADD . /workspace/vnet WORKDIR /workspace/vnet RUN pip install --upgrade pip -RUN pip install ./dllogger +RUN pip install --user git+https://github.com/NVIDIA/dllogger RUN pip install --disable-pip-version-check -r requirements.txt diff --git a/TensorFlow/Segmentation/VNet/dllogger/.gitignore b/TensorFlow/Segmentation/VNet/dllogger/.gitignore deleted file mode 100644 index f71748641..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/.gitignore +++ /dev/null @@ -1,125 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ diff --git a/TensorFlow/Segmentation/VNet/dllogger/LICENSE b/TensorFlow/Segmentation/VNet/dllogger/LICENSE deleted file mode 100644 index bc93b176c..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/TensorFlow/Segmentation/VNet/dllogger/README.md b/TensorFlow/Segmentation/VNet/dllogger/README.md deleted file mode 100644 index f3a742c23..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# DLLogger - minimal logging tool - -This project emerged from the need for unified logging schema for Deep Learning Examples modules. It provides a simple, extensible and intuitive logging capabilities with API trimmed to absolute minimum. - -## Installation -```bash -pip install dllogger -``` - -## Quick Start - -To start using DLLogger you need to add just two lines of code and you are good to go! -```python -from dllogger import StdOutBackend, JSONStreamBackend, Verbosity -import dllogger as DLLogger - -DLLogger.init(backends=[ - StdOutBackend(Verbosity.DEFAULT), - JSONStreamBackend(Verbosity.VERBOSE, "tmp.json"), -]) -``` - -To log anything you need to call `DLLogger.log(step=, data=, verbosity=)` - -`` can be any number/string/tuple which would indicate where are we in the training process. -We propose a following convention: - - -- Use `step="PARAMETER"` for script parameters (everything that is needed to reproduce the result) - -- Use a tuple of numbers to indicate training progress, for example: - - - `step=tuple(epoch_number, iteration_number, validation_iteration_number)` for a `validation_iteration_number` in a validation that happens after iteration `iteration_number` of epoch `epoch_number` - - - `step=tuple(epoch_number,)` for a summary of epoch `epoch_number` - - - `step=tuple()` for a summary of whole training run - -`` should be a dictionary with metric names as keys and metric values as values. - -To log a metric metadata, for example unit, description, ordering, format call `DLLogger.metadata(metric_name, metric_metadata)` where metric metadata is a dictionary. Backends can use the metadata information -for logging purposes, for example StdOutBackend uses `format` and `unit` field to format its output. - -Log is automatically saved on exit of python process (with exception of processes killed with SIGKILL) but if you want to flush log file before training ends: -``` -DLLogger.flush() -``` - -Please refer to `examples/dllogger_example.py` and `examples/dllogger_singleton_example.py` files for example usage. - -## Available backends overview - -### StdOutBackend -Vanilla backend that holds no buffers. Just prints provided values to stdout. - -```python -StdOutBackend(verbosity, step_format=..., metric_format=...) -``` - --`step_format` is a function that formats step in DLLogger.log call - --`metric_format` is a function that formats a metric name and value given its metadata - -For details see the `default_*_format` functions in `dllogger/logger.py` - -Example output - see `examples/stdout.txt` - -### JSONStreamBackend -```python -JsonBackend(verbosity, file_name) -``` - -JSONStreamBackend is saving JSON lines into a file. Example output - see `examples/dummy_resnet_log.json` - -## Advanced usage - -### Multiple Loggers -It is possible to to obtain Logger instance without referencing to DLLogger global instance. -```python -from dllogger import Logger -logger = Logger(backends=BACKEND_LIST) -``` - diff --git a/TensorFlow/Segmentation/VNet/dllogger/__init__.py b/TensorFlow/Segmentation/VNet/dllogger/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/TensorFlow/Segmentation/VNet/dllogger/dllogger/__init__.py b/TensorFlow/Segmentation/VNet/dllogger/dllogger/__init__.py deleted file mode 100644 index 2576403b5..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/dllogger/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .logger import ( - Backend, - Verbosity, - Logger, - default_step_format, - default_metric_format, - StdOutBackend, - JSONStreamBackend, -) - -__version__ = "0.1.0" - - -class DLLoggerNotInitialized(Exception): - pass - - -class DLLLoggerAlreadyInitialized(Exception): - pass - - -class NotInitializedObject(object): - def __getattribute__(self, name): - raise DLLoggerNotInitialized( - "DLLogger not initialized. Initialize DLLogger with init(backends) function" - ) - - -GLOBAL_LOGGER = NotInitializedObject() - - -def log(step, data, verbosity=Verbosity.DEFAULT): - GLOBAL_LOGGER.log(step, data, verbosity=verbosity) - - -def metadata(metric, metadata): - GLOBAL_LOGGER.metadata(metric, metadata) - - -def flush(): - GLOBAL_LOGGER.flush() - - -def init(backends): - global GLOBAL_LOGGER - try: - if isinstance(GLOBAL_LOGGER, Logger): - raise DLLLoggerAlreadyInitialized() - except DLLoggerNotInitialized: - GLOBAL_LOGGER = Logger(backends) diff --git a/TensorFlow/Segmentation/VNet/dllogger/dllogger/logger.py b/TensorFlow/Segmentation/VNet/dllogger/dllogger/logger.py deleted file mode 100644 index fd75b67a1..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/dllogger/logger.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABC, abstractmethod -from collections import defaultdict -from datetime import datetime -import json -import atexit - - -class Backend(ABC): - def __init__(self, verbosity): - self._verbosity = verbosity - - @property - def verbosity(self): - return self._verbosity - - @abstractmethod - def log(self, timestamp, elapsedtime, step, data): - pass - - @abstractmethod - def metadata(self, timestamp, elapsedtime, metric, metadata): - pass - - -class Verbosity: - OFF = -1 - DEFAULT = 0 - VERBOSE = 1 - - -class Logger: - def __init__(self, backends): - self.backends = backends - atexit.register(self.flush) - self.starttime = datetime.now() - - def metadata(self, metric, metadata): - timestamp = datetime.now() - elapsedtime = (timestamp - self.starttime).total_seconds() - for b in self.backends: - b.metadata(timestamp, elapsedtime, metric, metadata) - - def log(self, step, data, verbosity=1): - timestamp = datetime.now() - elapsedtime = (timestamp - self.starttime).total_seconds() - for b in self.backends: - if b.verbosity >= verbosity: - b.log(timestamp, elapsedtime, step, data) - - def flush(self): - for b in self.backends: - b.flush() - - -def default_step_format(step): - return str(step) - - -def default_metric_format(metric, metadata, value): - unit = metadata["unit"] if "unit" in metadata.keys() else "" - format = "{" + metadata["format"] + "}" if "format" in metadata.keys() else "{}" - return "{} : {} {}".format( - metric, format.format(value) if value is not None else value, unit - ) - - -def default_prefix_format(timestamp): - return "DLL {} - ".format(timestamp) - - -class StdOutBackend(Backend): - def __init__( - self, - verbosity, - step_format=default_step_format, - metric_format=default_metric_format, - prefix_format=default_prefix_format, - ): - super().__init__(verbosity=verbosity) - - self._metadata = defaultdict(dict) - self.step_format = step_format - self.metric_format = metric_format - self.prefix_format = prefix_format - - def metadata(self, timestamp, elapsedtime, metric, metadata): - self._metadata[metric].update(metadata) - - def log(self, timestamp, elapsedtime, step, data): - print( - "{}{} {}".format( - self.prefix_format(timestamp), - self.step_format(step), - " ".join( - [ - self.metric_format(m, self._metadata[m], v) - for m, v in data.items() - ] - ), - ) - ) - - def flush(self): - pass - - -class JSONStreamBackend(Backend): - def __init__(self, verbosity, filename): - super().__init__(verbosity=verbosity) - self._filename = filename - self.file = open(filename, "w") - atexit.register(self.file.close) - - def metadata(self, timestamp, elapsedtime, metric, metadata): - self.file.write( - "DLLL {}\n".format( - json.dumps( - dict( - timestamp=str(timestamp.timestamp()), - elapsedtime=str(elapsedtime), - datetime=str(timestamp), - type="METADATA", - metric=metric, - metadata=metadata, - ) - ) - ) - ) - - def log(self, timestamp, elapsedtime, step, data): - self.file.write( - "DLLL {}\n".format( - json.dumps( - dict( - timestamp=str(timestamp.timestamp()), - datetime=str(timestamp), - elapsedtime=str(elapsedtime), - type="LOG", - step=step, - data=data, - ) - ) - ) - ) - - def flush(self): - self.file.flush() diff --git a/TensorFlow/Segmentation/VNet/dllogger/examples/__init__.py b/TensorFlow/Segmentation/VNet/dllogger/examples/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_example.py b/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_example.py deleted file mode 100644 index b97a7aae7..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_example.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dllogger import Logger, StdOutBackend, JSONStreamBackend, Verbosity - - -def format_step(step): - if isinstance(step, str): - return step - s = "" - if len(step) > 0: - s += "Epoch: {} ".format(step[0]) - if len(step) > 1: - s += "Iteration: {} ".format(step[1]) - if len(step) > 2: - s += "Validation Iteration: {} ".format(step[2]) - return s - - -l = Logger( - [ - StdOutBackend(Verbosity.DEFAULT, step_format=format_step), - JSONStreamBackend(Verbosity.VERBOSE, "tmp.json"), - ] -) - -# You can log metrics in separate calls -l.log(step="PARAMETER", data={"HP1": 17}, verbosity=Verbosity.DEFAULT) -l.log(step="PARAMETER", data={"HP2": 23}, verbosity=Verbosity.DEFAULT) -# or together -l.log(step="PARAMETER", data={"HP3": 1, "HP4": 2}, verbosity=Verbosity.DEFAULT) - -l.metadata("loss", {"unit": "nat", "GOAL": "MINIMIZE", "STAGE": "TRAIN"}) -l.metadata("val.loss", {"unit": "nat", "GOAL": "MINIMIZE", "STAGE": "VAL"}) -l.metadata( - "speed", - {"unit": "speeds/s", "format": ":.3f", "GOAL": "MAXIMIZE", "STAGE": "TRAIN"}, -) - -for epoch in range(0, 2): - - for it in range(0, 10): - l.log( - step=(epoch, it), - data={"loss": 130 / (1 + epoch * 10 + it)}, - verbosity=Verbosity.DEFAULT, - ) - if it % 3 == 0: - for vit in range(0, 3): - l.log( - step=(epoch, it, vit), - data={"val.loss": 230 / (1 + epoch * 10 + it + vit)}, - verbosity=Verbosity.DEFAULT, - ) - - l.log(step=(epoch,), data={"speed": 10}, verbosity=Verbosity.DEFAULT) -l.flush() diff --git a/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_singleton_example.py b/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_singleton_example.py deleted file mode 100644 index b97a7aae7..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/examples/dllogger_singleton_example.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dllogger import Logger, StdOutBackend, JSONStreamBackend, Verbosity - - -def format_step(step): - if isinstance(step, str): - return step - s = "" - if len(step) > 0: - s += "Epoch: {} ".format(step[0]) - if len(step) > 1: - s += "Iteration: {} ".format(step[1]) - if len(step) > 2: - s += "Validation Iteration: {} ".format(step[2]) - return s - - -l = Logger( - [ - StdOutBackend(Verbosity.DEFAULT, step_format=format_step), - JSONStreamBackend(Verbosity.VERBOSE, "tmp.json"), - ] -) - -# You can log metrics in separate calls -l.log(step="PARAMETER", data={"HP1": 17}, verbosity=Verbosity.DEFAULT) -l.log(step="PARAMETER", data={"HP2": 23}, verbosity=Verbosity.DEFAULT) -# or together -l.log(step="PARAMETER", data={"HP3": 1, "HP4": 2}, verbosity=Verbosity.DEFAULT) - -l.metadata("loss", {"unit": "nat", "GOAL": "MINIMIZE", "STAGE": "TRAIN"}) -l.metadata("val.loss", {"unit": "nat", "GOAL": "MINIMIZE", "STAGE": "VAL"}) -l.metadata( - "speed", - {"unit": "speeds/s", "format": ":.3f", "GOAL": "MAXIMIZE", "STAGE": "TRAIN"}, -) - -for epoch in range(0, 2): - - for it in range(0, 10): - l.log( - step=(epoch, it), - data={"loss": 130 / (1 + epoch * 10 + it)}, - verbosity=Verbosity.DEFAULT, - ) - if it % 3 == 0: - for vit in range(0, 3): - l.log( - step=(epoch, it, vit), - data={"val.loss": 230 / (1 + epoch * 10 + it + vit)}, - verbosity=Verbosity.DEFAULT, - ) - - l.log(step=(epoch,), data={"speed": 10}, verbosity=Verbosity.DEFAULT) -l.flush() diff --git a/TensorFlow/Segmentation/VNet/dllogger/examples/example_resnet_log.json b/TensorFlow/Segmentation/VNet/dllogger/examples/example_resnet_log.json deleted file mode 100644 index 1bd681c2c..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/examples/example_resnet_log.json +++ /dev/null @@ -1,52 +0,0 @@ -DLLL {"timestamp": "1570716916.143421", "datetime": "2019-10-10 14:15:16.143421", "type": "LOG", "step": "PARAMETER", "data": {"data": "/imagenet", "data_backend": "dali-cpu", "arch": "resnet50", "model_config": "classic", "workers": 5, "epochs": 2, "batch_size": 128, "optimizer_batch_size": -1, "lr": 0.1, "lr_schedule": "step", "warmup": 0, "label_smoothing": 0.0, "mixup": 0.0, "momentum": 0.9, "weight_decay": 0.0001, "bn_weight_decay": false, "nesterov": false, "print_freq": 5, "resume": "", "pretrained_weights": "", "fp16": false, "static_loss_scale": 1, "dynamic_loss_scale": false, "prof": 25, "amp": true, "local_rank": 0, "seed": null, "gather_checkpoints": false, "raport_file": "experiment_raport.json", "evaluate": false, "training_only": false, "save_checkpoints": true, "workspace": "/results", "distributed": false, "gpu": 0, "world_size": 1}} -DLLL {"timestamp": "1570716916.14697", "datetime": "2019-10-10 14:15:16.146970", "type": "METADATA", "metric": "lr", "metadata": {}} -DLLL {"timestamp": "1570716916.152713", "datetime": "2019-10-10 14:15:16.152713", "type": "METADATA", "metric": "train.loss", "metadata": {"format": ":.5f", "type": "LOSS"}} -DLLL {"timestamp": "1570716916.152793", "datetime": "2019-10-10 14:15:16.152793", "type": "METADATA", "metric": "train.compute_ips", "metadata": {"unit": "img/s", "format": ":.2f", "type": "THROUGHPUT"}} -DLLL {"timestamp": "1570716916.15283", "datetime": "2019-10-10 14:15:16.152830", "type": "METADATA", "metric": "train.total_ips", "metadata": {"unit": "img/s", "format": ":.2f", "type": "THROUGHPUT"}} -DLLL {"timestamp": "1570716916.152859", "datetime": "2019-10-10 14:15:16.152859", "type": "METADATA", "metric": "train.data_time", "metadata": {"unit": "s", "format": ":.5f"}} -DLLL {"timestamp": "1570716916.152888", "datetime": "2019-10-10 14:15:16.152888", "type": "METADATA", "metric": "train.compute_time", "metadata": {"unit": "s", "format": ":.5f"}} -DLLL {"timestamp": "1570716921.842053", "datetime": "2019-10-10 14:15:21.842053", "type": "LOG", "step": [0, 5], "data": {"train.loss": 8.56640625, "train.total_ips": 545.2730241953889}} -DLLL {"timestamp": "1570716921.84219", "datetime": "2019-10-10 14:15:21.842190", "type": "LOG", "step": [0, 5], "data": {"lr": 0.1, "train.compute_ips": 547.1155618593889, "train.data_time": 0.0024609088897705076, "train.compute_time": 1.1351056098937988}} -DLLL {"timestamp": "1570716922.790879", "datetime": "2019-10-10 14:15:22.790879", "type": "LOG", "step": [0, 10], "data": {"train.loss": 8.996875, "train.total_ips": 674.7180797941068}} -DLLL {"timestamp": "1570716922.791004", "datetime": "2019-10-10 14:15:22.791004", "type": "LOG", "step": [0, 10], "data": {"lr": 0.1, "train.compute_ips": 676.7830689548455, "train.data_time": 0.0005788326263427735, "train.compute_time": 0.1891303539276123}} -DLLL {"timestamp": "1570716923.7396", "datetime": "2019-10-10 14:15:23.739600", "type": "LOG", "step": [0, 15], "data": {"train.loss": 8.921875, "train.total_ips": 674.8162169210029}} -DLLL {"timestamp": "1570716923.739728", "datetime": "2019-10-10 14:15:23.739728", "type": "LOG", "step": [0, 15], "data": {"lr": 0.1, "train.compute_ips": 676.7706087961133, "train.data_time": 0.00054779052734375, "train.compute_time": 0.1891340732574463}} -DLLL {"timestamp": "1570716924.688333", "datetime": "2019-10-10 14:15:24.688333", "type": "LOG", "step": [0, 20], "data": {"train.loss": 7.84453125, "train.total_ips": 674.787389360117}} -DLLL {"timestamp": "1570716924.688457", "datetime": "2019-10-10 14:15:24.688457", "type": "LOG", "step": [0, 20], "data": {"lr": 0.1, "train.compute_ips": 676.7296941210952, "train.data_time": 0.0005443572998046875, "train.compute_time": 0.18914542198181153}} -DLLL {"timestamp": "1570716925.636707", "datetime": "2019-10-10 14:15:25.636707", "type": "METADATA", "metric": "val.top1", "metadata": {"unit": "%", "format": ":.2f", "type": "ACCURACY"}} -DLLL {"timestamp": "1570716925.636772", "datetime": "2019-10-10 14:15:25.636772", "type": "METADATA", "metric": "val.top5", "metadata": {"unit": "%", "format": ":.2f", "type": "ACCURACY"}} -DLLL {"timestamp": "1570716925.636865", "datetime": "2019-10-10 14:15:25.636865", "type": "METADATA", "metric": "val.loss", "metadata": {"format": ":.5f", "type": "LOSS"}} -DLLL {"timestamp": "1570716925.636883", "datetime": "2019-10-10 14:15:25.636883", "type": "METADATA", "metric": "val.compute_ips", "metadata": {"unit": "img/s", "format": ":.2f", "type": "THROUGHPUT"}} -DLLL {"timestamp": "1570716925.636901", "datetime": "2019-10-10 14:15:25.636901", "type": "METADATA", "metric": "val.total_ips", "metadata": {"unit": "img/s", "format": ":.2f", "type": "THROUGHPUT"}} -DLLL {"timestamp": "1570716925.636916", "datetime": "2019-10-10 14:15:25.636916", "type": "METADATA", "metric": "val.data_time", "metadata": {"unit": "s", "format": ":.5f"}} -DLLL {"timestamp": "1570716925.636931", "datetime": "2019-10-10 14:15:25.636931", "type": "METADATA", "metric": "val.compute_latency", "metadata": {"unit": "s", "format": ":.5f"}} -DLLL {"timestamp": "1570716925.976676", "datetime": "2019-10-10 14:15:25.976676", "type": "LOG", "step": [0, 25, 5], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 980.1, "val.total_ips": 1899.5387923262765}} -DLLL {"timestamp": "1570716925.976797", "datetime": "2019-10-10 14:15:25.976797", "type": "LOG", "step": [0, 25, 5], "data": {"val.compute_ips": 1992.1607639411063, "val.data_time": 0.003461885452270508, "val.compute_latency": 0.06426310539245605}} -DLLL {"timestamp": "1570716926.31643", "datetime": "2019-10-10 14:15:26.316430", "type": "LOG", "step": [0, 25, 10], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 999.4, "val.total_ips": 1906.0188396215763}} -DLLL {"timestamp": "1570716926.316559", "datetime": "2019-10-10 14:15:26.316559", "type": "LOG", "step": [0, 25, 10], "data": {"val.compute_ips": 1946.053556840902, "val.data_time": 0.001429128646850586, "val.compute_latency": 0.06578779220581055}} -DLLL {"timestamp": "1570716926.716978", "datetime": "2019-10-10 14:15:26.716978", "type": "LOG", "step": [0, 25, 15], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 878.6, "val.total_ips": 1718.1455378246978}} -DLLL {"timestamp": "1570716926.717101", "datetime": "2019-10-10 14:15:26.717101", "type": "LOG", "step": [0, 25, 15], "data": {"val.compute_ips": 1920.76530625035, "val.data_time": 0.01335606575012207, "val.compute_latency": 0.06664037704467773}} -DLLL {"timestamp": "1570716927.078896", "datetime": "2019-10-10 14:15:27.078896", "type": "LOG", "step": [0, 25, 20], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 741.3, "val.total_ips": 1782.0731816645464}} -DLLL {"timestamp": "1570716927.079049", "datetime": "2019-10-10 14:15:27.079049", "type": "LOG", "step": [0, 25, 20], "data": {"val.compute_ips": 1961.7507761216355, "val.data_time": 0.0070168495178222655, "val.compute_latency": 0.065254545211792}} -DLLL {"timestamp": "1570716927.418798", "datetime": "2019-10-10 14:15:27.418798", "type": "LOG", "step": [0], "data": {"train.loss": 8.34140625, "train.total_ips": 648.9330897795348, "val.top1": 0.0, "val.top5": 0.0, "val.loss": 907.16, "val.total_ips": 1838.6215513621019}} -DLLL {"timestamp": "1570716927.418929", "datetime": "2019-10-10 14:15:27.418929", "type": "LOG", "step": [0], "data": {"lr": 0.1, "train.compute_ips": 650.8774403403595, "train.data_time": 0.0009337902069091797, "train.compute_time": 0.37831764221191405, "val.compute_ips": 1948.5837818061648, "val.data_time": 0.005306968688964844, "val.compute_latency": 0.06570755004882813}} -DLLL {"timestamp": "1570716928.478332", "datetime": "2019-10-10 14:15:28.478332", "type": "LOG", "step": [1, 5], "data": {"train.loss": 7.26328125, "train.total_ips": 675.02666986389}} -DLLL {"timestamp": "1570716928.478458", "datetime": "2019-10-10 14:15:28.478458", "type": "LOG", "step": [1, 5], "data": {"lr": 0.1, "train.compute_ips": 677.4323865601783, "train.data_time": 0.0006747722625732421, "train.compute_time": 0.18894901275634765}} -DLLL {"timestamp": "1570716929.427186", "datetime": "2019-10-10 14:15:29.427186", "type": "LOG", "step": [1, 10], "data": {"train.loss": 7.13203125, "train.total_ips": 674.6990366923962}} -DLLL {"timestamp": "1570716929.427309", "datetime": "2019-10-10 14:15:29.427309", "type": "LOG", "step": [1, 10], "data": {"lr": 0.1, "train.compute_ips": 676.6737206745277, "train.data_time": 0.0005538463592529297, "train.compute_time": 0.18916082382202148}} -DLLL {"timestamp": "1570716930.375551", "datetime": "2019-10-10 14:15:30.375551", "type": "LOG", "step": [1, 15], "data": {"train.loss": 7.0953125, "train.total_ips": 675.0561212936148}} -DLLL {"timestamp": "1570716930.375678", "datetime": "2019-10-10 14:15:30.375678", "type": "LOG", "step": [1, 15], "data": {"lr": 0.1, "train.compute_ips": 677.0189091520703, "train.data_time": 0.0005497455596923829, "train.compute_time": 0.18906450271606445}} -DLLL {"timestamp": "1570716931.323747", "datetime": "2019-10-10 14:15:31.323747", "type": "LOG", "step": [1, 20], "data": {"train.loss": 6.99453125, "train.total_ips": 675.1843044045183}} -DLLL {"timestamp": "1570716931.323873", "datetime": "2019-10-10 14:15:31.323873", "type": "LOG", "step": [1, 20], "data": {"lr": 0.1, "train.compute_ips": 677.1184791430926, "train.data_time": 0.0005415439605712891, "train.compute_time": 0.18903646469116211}} -DLLL {"timestamp": "1570716932.610946", "datetime": "2019-10-10 14:15:32.610946", "type": "LOG", "step": [1, 25, 5], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 7.0375, "val.total_ips": 1907.7503340033672}} -DLLL {"timestamp": "1570716932.611064", "datetime": "2019-10-10 14:15:32.611064", "type": "LOG", "step": [1, 25, 5], "data": {"val.compute_ips": 1939.4209648795324, "val.data_time": 0.0011357784271240235, "val.compute_latency": 0.06602025032043457}} -DLLL {"timestamp": "1570716932.947566", "datetime": "2019-10-10 14:15:32.947566", "type": "LOG", "step": [1, 25, 10], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 6.9109375, "val.total_ips": 1904.8082467433455}} -DLLL {"timestamp": "1570716932.947687", "datetime": "2019-10-10 14:15:32.947687", "type": "LOG", "step": [1, 25, 10], "data": {"val.compute_ips": 1920.5755315179053, "val.data_time": 0.0005514144897460938, "val.compute_latency": 0.06664786338806153}} -DLLL {"timestamp": "1570716933.284214", "datetime": "2019-10-10 14:15:33.284214", "type": "LOG", "step": [1, 25, 15], "data": {"val.top1": 0.0, "val.top5": 0.0, "val.loss": 7.16875, "val.total_ips": 1904.632943282044}} -DLLL {"timestamp": "1570716933.284334", "datetime": "2019-10-10 14:15:33.284334", "type": "LOG", "step": [1, 25, 15], "data": {"val.compute_ips": 1920.1927488861306, "val.data_time": 0.0005453109741210937, "val.compute_latency": 0.0666773796081543}} -DLLL {"timestamp": "1570716933.617256", "datetime": "2019-10-10 14:15:33.617256", "type": "LOG", "step": [1, 25, 20], "data": {"val.top1": 0.0, "val.top5": 1.40625, "val.loss": 6.9921875, "val.total_ips": 1925.4704413854456}} -DLLL {"timestamp": "1570716933.617377", "datetime": "2019-10-10 14:15:33.617377", "type": "LOG", "step": [1, 25, 20], "data": {"val.compute_ips": 1941.7822306655355, "val.data_time": 0.0005568027496337891, "val.compute_latency": 0.06593794822692871}} -DLLL {"timestamp": "1570716934.053365", "datetime": "2019-10-10 14:15:34.053365", "type": "LOG", "step": [1], "data": {"train.loss": 7.094375, "train.total_ips": 675.0272251225636, "val.top1": 0.0, "val.top5": 0.28125, "val.loss": 7.0034375, "val.total_ips": 1853.2851323015802}} -DLLL {"timestamp": "1570716934.053537", "datetime": "2019-10-10 14:15:34.053537", "type": "LOG", "step": [1], "data": {"lr": 0.1, "train.compute_ips": 677.075601164113, "train.data_time": 0.0005739879608154297, "train.compute_time": 0.18904858589172363, "val.compute_ips": 1935.621602561903, "val.data_time": 0.00468775749206543, "val.compute_latency": 0.06614609718322755}} -DLLL {"timestamp": "1570716934.289014", "datetime": "2019-10-10 14:15:34.289014", "type": "LOG", "step": [], "data": {"train.loss": 8.34140625, "train.total_ips": 661.9801574510492, "val.top1": 0.0, "val.top5": 0.28125, "val.loss": 907.16, "val.total_ips": 1845.9533418318408}} -DLLL {"timestamp": "1570716934.289168", "datetime": "2019-10-10 14:15:34.289168", "type": "LOG", "step": [], "data": {"lr": 0.1, "train.compute_ips": 663.9765207522363, "train.data_time": 0.0007538890838623047, "train.compute_time": 0.28368311405181884, "val.compute_ips": 1942.102692184034, "val.data_time": 0.0049973630905151365, "val.compute_latency": 0.06592682361602784}} diff --git a/TensorFlow/Segmentation/VNet/dllogger/examples/example_stdout_log.json b/TensorFlow/Segmentation/VNet/dllogger/examples/example_stdout_log.json deleted file mode 100644 index 122ea9b26..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/examples/example_stdout_log.json +++ /dev/null @@ -1,49 +0,0 @@ -DLLL 2019-10-10 16:42:54.500902 - PARAMETER HP1 : 17 -DLLL 2019-10-10 16:42:54.501000 - PARAMETER HP2 : 23 -DLLL 2019-10-10 16:42:54.501035 - PARAMETER HP3 : 1 HP4 : 2 -DLLL 2019-10-10 16:42:54.501122 - Epoch: 0 speed : 10.000 speeds/s -DLLL 2019-10-10 16:42:54.501155 - Epoch: 0 Iteration: 0 loss : 130.0 nat -DLLL 2019-10-10 16:42:54.501186 - Epoch: 0 Iteration: 0 Validation Iteration: 0 val.loss : 230.0 nat -DLLL 2019-10-10 16:42:54.501214 - Epoch: 0 Iteration: 0 Validation Iteration: 1 val.loss : 115.0 nat -DLLL 2019-10-10 16:42:54.501241 - Epoch: 0 Iteration: 0 Validation Iteration: 2 val.loss : 76.66666666666667 nat -DLLL 2019-10-10 16:42:54.501273 - Epoch: 0 Iteration: 1 loss : 65.0 nat -DLLL 2019-10-10 16:42:54.501301 - Epoch: 0 Iteration: 2 loss : 43.333333333333336 nat -DLLL 2019-10-10 16:42:54.501328 - Epoch: 0 Iteration: 3 loss : 32.5 nat -DLLL 2019-10-10 16:42:54.501355 - Epoch: 0 Iteration: 3 Validation Iteration: 0 val.loss : 57.5 nat -DLLL 2019-10-10 16:42:54.501382 - Epoch: 0 Iteration: 3 Validation Iteration: 1 val.loss : 46.0 nat -DLLL 2019-10-10 16:42:54.501408 - Epoch: 0 Iteration: 3 Validation Iteration: 2 val.loss : 38.333333333333336 nat -DLLL 2019-10-10 16:42:54.501439 - Epoch: 0 Iteration: 4 loss : 26.0 nat -DLLL 2019-10-10 16:42:54.501466 - Epoch: 0 Iteration: 5 loss : 21.666666666666668 nat -DLLL 2019-10-10 16:42:54.501493 - Epoch: 0 Iteration: 6 loss : 18.571428571428573 nat -DLLL 2019-10-10 16:42:54.501521 - Epoch: 0 Iteration: 6 Validation Iteration: 0 val.loss : 32.857142857142854 nat -DLLL 2019-10-10 16:42:54.501553 - Epoch: 0 Iteration: 6 Validation Iteration: 1 val.loss : 28.75 nat -DLLL 2019-10-10 16:42:54.501581 - Epoch: 0 Iteration: 6 Validation Iteration: 2 val.loss : 25.555555555555557 nat -DLLL 2019-10-10 16:42:54.501612 - Epoch: 0 Iteration: 7 loss : 16.25 nat -DLLL 2019-10-10 16:42:54.501640 - Epoch: 0 Iteration: 8 loss : 14.444444444444445 nat -DLLL 2019-10-10 16:42:54.501667 - Epoch: 0 Iteration: 9 loss : 13.0 nat -DLLL 2019-10-10 16:42:54.501693 - Epoch: 0 Iteration: 9 Validation Iteration: 0 val.loss : 23.0 nat -DLLL 2019-10-10 16:42:54.501719 - Epoch: 0 Iteration: 9 Validation Iteration: 1 val.loss : 20.90909090909091 nat -DLLL 2019-10-10 16:42:54.501750 - Epoch: 0 Iteration: 9 Validation Iteration: 2 val.loss : 19.166666666666668 nat -DLLL 2019-10-10 16:42:54.501778 - Epoch: 1 speed : 10.000 speeds/s -DLLL 2019-10-10 16:42:54.501805 - Epoch: 1 Iteration: 0 loss : 11.818181818181818 nat -DLLL 2019-10-10 16:42:54.501833 - Epoch: 1 Iteration: 0 Validation Iteration: 0 val.loss : 20.90909090909091 nat -DLLL 2019-10-10 16:42:54.501861 - Epoch: 1 Iteration: 0 Validation Iteration: 1 val.loss : 19.166666666666668 nat -DLLL 2019-10-10 16:42:54.501893 - Epoch: 1 Iteration: 0 Validation Iteration: 2 val.loss : 17.692307692307693 nat -DLLL 2019-10-10 16:42:54.501921 - Epoch: 1 Iteration: 1 loss : 10.833333333333334 nat -DLLL 2019-10-10 16:42:54.501948 - Epoch: 1 Iteration: 2 loss : 10.0 nat -DLLL 2019-10-10 16:42:54.501974 - Epoch: 1 Iteration: 3 loss : 9.285714285714286 nat -DLLL 2019-10-10 16:42:54.502001 - Epoch: 1 Iteration: 3 Validation Iteration: 0 val.loss : 16.428571428571427 nat -DLLL 2019-10-10 16:42:54.502029 - Epoch: 1 Iteration: 3 Validation Iteration: 1 val.loss : 15.333333333333334 nat -DLLL 2019-10-10 16:42:54.502056 - Epoch: 1 Iteration: 3 Validation Iteration: 2 val.loss : 14.375 nat -DLLL 2019-10-10 16:42:54.502084 - Epoch: 1 Iteration: 4 loss : 8.666666666666666 nat -DLLL 2019-10-10 16:42:54.502111 - Epoch: 1 Iteration: 5 loss : 8.125 nat -DLLL 2019-10-10 16:42:54.502138 - Epoch: 1 Iteration: 6 loss : 7.647058823529412 nat -DLLL 2019-10-10 16:42:54.502165 - Epoch: 1 Iteration: 6 Validation Iteration: 0 val.loss : 13.529411764705882 nat -DLLL 2019-10-10 16:42:54.502193 - Epoch: 1 Iteration: 6 Validation Iteration: 1 val.loss : 12.777777777777779 nat -DLLL 2019-10-10 16:42:54.502223 - Epoch: 1 Iteration: 6 Validation Iteration: 2 val.loss : 12.105263157894736 nat -DLLL 2019-10-10 16:42:54.502251 - Epoch: 1 Iteration: 7 loss : 7.222222222222222 nat -DLLL 2019-10-10 16:42:54.502278 - Epoch: 1 Iteration: 8 loss : 6.842105263157895 nat -DLLL 2019-10-10 16:42:54.502307 - Epoch: 1 Iteration: 9 loss : 6.5 nat -DLLL 2019-10-10 16:42:54.502333 - Epoch: 1 Iteration: 9 Validation Iteration: 0 val.loss : 11.5 nat -DLLL 2019-10-10 16:42:54.502360 - Epoch: 1 Iteration: 9 Validation Iteration: 1 val.loss : 10.952380952380953 nat -DLLL 2019-10-10 16:42:54.502388 - Epoch: 1 Iteration: 9 Validation Iteration: 2 val.loss : 10.454545454545455 nat diff --git a/TensorFlow/Segmentation/VNet/dllogger/setup.py b/TensorFlow/Segmentation/VNet/dllogger/setup.py deleted file mode 100644 index 6bd1e43c2..000000000 --- a/TensorFlow/Segmentation/VNet/dllogger/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import setuptools -import pathlib - -README = (pathlib.Path(__file__).parent / "README.md").read_text() - -setuptools.setup( - name="DLLogger", - version="0.1.0", - author="NVIDIA Corporation", - description="NVIDIA DLLogger - logging for Deep Learning applications", - long_description=README, - long_description_content_type="text/markdown", - url="https://github.com/NVIDIA/dllogger", - packages=["dllogger"], - install_package_data=True, - license='Apache2', - license_file='./LICENSE', - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Operating System :: OS Independent", - ], - python_requires=">=3.5", -) diff --git a/TensorFlow/Segmentation/VNet/main.py b/TensorFlow/Segmentation/VNet/main.py index 9292eaa00..2ebe094f8 100644 --- a/TensorFlow/Segmentation/VNet/main.py +++ b/TensorFlow/Segmentation/VNet/main.py @@ -26,8 +26,8 @@ import horovod.tensorflow as hvd import tensorflow as tf -import dllogger.dllogger as DLLogger -from dllogger.dllogger import StdOutBackend, JSONStreamBackend, Verbosity +import dllogger as DLLogger +from dllogger import StdOutBackend, JSONStreamBackend, Verbosity from hooks.profiling_hook import ProfilingHook from hooks.train_hook import TrainHook from utils.cmd_util import PARSER