|
4 | 4 | # DeepSpeed Team |
5 | 5 | """ |
6 | 6 | Run all steps with default settings: |
7 | | -$ python3 train.py |
| 7 | +$ python3 e2e_rlhf.py |
8 | 8 |
|
9 | 9 | Change the model used for each step: |
10 | | -$ python3 train.py --actor-model 350m --reward-model 1.3b |
| 10 | +$ python3 e2e_rlhf.py --actor-model 350m --reward-model 1.3b |
11 | 11 |
|
12 | 12 | Change the ZeRO stage used for actor/reward models: |
13 | | -$ python3 train.py --actor-zero-stage 1 --reward-zero-stage 3 |
| 13 | +$ python3 e2e_rlhf.py --actor-zero-stage 1 --reward-zero-stage 3 |
14 | 14 |
|
15 | 15 | Run a subset of the steps: |
16 | | -$ python3 train.py --step 1 2 |
| 16 | +$ python3 e2e_rlhf.py --step 1 2 |
17 | 17 |
|
18 | 18 | Note: Step 3 relies on models trained in Steps 1 & 2. If you have already |
19 | 19 | trained these models, you can run just Step 3 and select which models from |
20 | 20 | Steps 1 & 2 to use. For example, let's train models for Steps 1 & 2 using |
21 | 21 | 125m and 350m models: |
22 | | -$ python3 train.py --step 1 2 --actor-model 125m --reward-model 125m |
23 | | -$ python3 train.py --step 1 2 --actor-model 350m --reward-model 350m |
| 22 | +$ python3 e2e_rlhf.py --step 1 2 --actor-model 125m --reward-model 125m |
| 23 | +$ python3 e2e_rlhf.py --step 1 2 --actor-model 350m --reward-model 350m |
24 | 24 |
|
25 | 25 | Now we can run Step 3 with any combination of these models: |
26 | | -$ python3 train.py --step 3 --actor-model 125m --reward-model 350m |
27 | | -$ python3 train.py --step 3 --actor-model 350m --reward-model 125m |
| 26 | +$ python3 e2e_rlhf.py --step 3 --actor-model 125m --reward-model 350m |
| 27 | +$ python3 e2e_rlhf.py --step 3 --actor-model 350m --reward-model 125m |
28 | 28 | """ |
29 | 29 |
|
30 | 30 | import argparse |
|
33 | 33 | import os |
34 | 34 | import datetime |
35 | 35 | import time |
| 36 | +import sys |
36 | 37 |
|
37 | 38 | step_dirs = { |
38 | 39 | 1: "training/step1_supervised_finetuning", |
@@ -144,7 +145,7 @@ def verify_model(args, step_num): |
144 | 145 | model_file = os.path.join(output_dir, "pytorch_model.bin") |
145 | 146 | if not os.path.isfile(model_file): |
146 | 147 | error_str = f"Step {step_num} model has not been trained. Train it with:\n" |
147 | | - error_str += f"python3 train.py --step {step_num}" |
| 148 | + error_str += f"{sys.executable.split('/')[-1]} {sys.argv[0]} --step {step_num}" |
148 | 149 | error_str += f" --{model_type[step_num]}-model {model_size}" |
149 | 150 | raise RuntimeError(error_str) |
150 | 151 |
|
@@ -194,7 +195,7 @@ def main(args): |
194 | 195 | cmd = get_cmd(args, step_num) |
195 | 196 | launch_cmd(args, step_num, cmd) |
196 | 197 |
|
197 | | - step_time = int(time.time() - start_time) |
| 198 | + step_time = int(time.time() - step_start_time) |
198 | 199 | time_str = str(datetime.timedelta(seconds=step_time)) |
199 | 200 | print(f"---=== Finished Step {step_num} in {time_str} ===---") |
200 | 201 |
|
|
0 commit comments