File tree Expand file tree Collapse file tree
TensorFlow2/Segmentation/UNet_Medical Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -526,7 +526,7 @@ This command will launch a script which will run 5-fold cross-validation trainin
526526** Learning curves**
527527
528528The following image show the training loss as a function of iteration for training using DGX A100 (TF32 and TF-AMP) and DGX-1 V100 (FP32 and TF-AMP).
529- ![ LearningCurves] ( images/UNetMed_TF2_conv .png )
529+ ![ LearningCurves] ( images/U-NetMed_TF2_conv .png )
530530
531531
532532#### Training performance results
Original file line number Diff line number Diff line change @@ -21,4 +21,4 @@ horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --m
2121horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 2 --augment --xla > $2 /log_FP32_1GPU_fold2.txt
2222horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 3 --augment --xla > $2 /log_FP32_1GPU_fold3.txt
2323horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 4 --augment --xla > $2 /log_FP32_1GPU_fold4.txt
24- python utils /parse_results.py --model_dir $2 --exec_mode convergence --env FP32_1GPU
24+ python runtime /parse_results.py --model_dir $2 --exec_mode convergence --env FP32_1GPU
Original file line number Diff line number Diff line change @@ -21,4 +21,4 @@ horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --m
2121horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 2 --augment --xla > $2 /log_FP32_8GPU_fold2.txt
2222horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 3 --augment --xla > $2 /log_FP32_8GPU_fold3.txt
2323horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 4 --augment --xla > $2 /log_FP32_8GPU_fold4.txt
24- python utils /parse_results.py --model_dir $2 --exec_mode convergence --env FP32_8GPU
24+ python runtime /parse_results.py --model_dir $2 --exec_mode convergence --env FP32_8GPU
Original file line number Diff line number Diff line change @@ -21,4 +21,4 @@ horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --m
2121horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 2 --augment --xla --amp > $2 /log_TF-AMP_1GPU_fold2.txt
2222horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 3 --augment --xla --amp > $2 /log_TF-AMP_1GPU_fold3.txt
2323horovodrun -np 1 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 4 --augment --xla --amp > $2 /log_TF-AMP_1GPU_fold4.txt
24- python utils /parse_results.py --model_dir $2 --exec_mode convergence --env TF-AMP_1GPU
24+ python runtime /parse_results.py --model_dir $2 --exec_mode convergence --env TF-AMP_1GPU
Original file line number Diff line number Diff line change @@ -21,4 +21,4 @@ horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --m
2121horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 2 --augment --xla --amp > $2 /log_TF-AMP_8GPU_fold2.txt
2222horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 3 --augment --xla --amp > $2 /log_TF-AMP_8GPU_fold3.txt
2323horovodrun -np 8 python main.py --data_dir $1 --model_dir $2 --log_every 100 --max_steps 6400 --batch_size $3 --exec_mode train_and_evaluate --fold 4 --augment --xla --amp > $2 /log_TF-AMP_8GPU_fold4.txt
24- python utils /parse_results.py --model_dir $2 --exec_mode convergence --env TF-AMP_8GPU
24+ python runtime /parse_results.py --model_dir $2 --exec_mode convergence --env TF-AMP_8GPU
You can’t perform that action at this time.
0 commit comments