forked from deepspeedai/DeepSpeedExamples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
executable file
·93 lines (76 loc) · 4.21 KB
/
utils.py
File metadata and controls
executable file
·93 lines (76 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
import argparse
def get_argument_parser():
parser = argparse.ArgumentParser()
# Required_parameter
parser.add_argument("--config-file", "--cf",
help="pointer to the configuration file of the experiment", type=str, required=True)
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model checkpoints will be written.")
# Optional Params
parser.add_argument("--max_seq_length", default=512, type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.")
parser.add_argument("--max_predictions_per_seq", "--max_pred", default=80, type=int,
help="The maximum number of masked tokens in a sequence to be predicted.")
parser.add_argument('--seed',
type=int,
default=42,
help="random seed for initialization")
parser.add_argument("--do_lower_case",
default=True,
action='store_true',
help="Whether to lower case the input text. True for uncased models, False for cased models.")
parser.add_argument("--local_rank",
type=int,
default=-1,
help="local_rank for distributed training on gpus")
parser.add_argument('--use_pretrain',
default=False,
action='store_true',
help="Whether to use Bert Pretrain Weights or not")
parser.add_argument('--refresh_bucket_size',
type=int,
default=1,
help="This param makes sure that a certain task is repeated for this time steps to \
optimise on the back propogation speed with APEX's DistributedDataParallel")
parser.add_argument('--finetune',
default=False,
action='store_true',
help="Whether to finetune only")
parser.add_argument('--load_training_checkpoint', '--load_cp',
type=str,
default=None,
help="This is the path to the TAR file which contains model+opt state_dict() checkpointed.")
parser.add_argument('--load_checkpoint_id', '--load_cp_id',
type=str,
default=None,
help='Checkpoint identifier to load from checkpoint path')
parser.add_argument('--job_name',
type=str,
default=None,
help="This is the path to store the output and TensorBoard results.")
parser.add_argument('--rewarmup',
default=False,
action='store_true',
help='Rewarmup learning rate after resuming from a checkpoint')
parser.add_argument('--max_steps',
type=int,
default=sys.maxsize,
help='Maximum number of training steps of effective batch size to complete.')
parser.add_argument('--max_steps_per_epoch',
type=int,
default=sys.maxsize,
help='Maximum number of training steps of effective batch size within an epoch to complete.')
parser.add_argument('--print_steps',
type=int,
default=100,
help='Interval to print training details.')
parser.add_argument('--data_path_prefix',
type=str,
default="",
help="Path to prefix data loading, helpful for AML and other environments")
return parser
def is_time_to_exit(args, epoch_steps=0, global_steps=0):
return (epoch_steps >= args.max_steps_per_epoch) or \
(global_steps >= args.max_steps)