-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtutorial2_msi_network.sh
More file actions
265 lines (229 loc) · 10.1 KB
/
tutorial2_msi_network.sh
File metadata and controls
265 lines (229 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/bin/bash
__doc__="
This demonstrates an end-to-end pipeline on multispectral toydata
This walks through the entire process of fit -> predict -> evaluate and the
output if you run this should end with something like
source ~/code/geowatch/docs/source/manual/tutorial/tutorial2_msi_network.sh
"
# Define wherever you want to store results and create the directories
# if they don't already exist.
DVC_DATA_DPATH=$HOME/data/dvc-repos/toy_data_dvc
DVC_EXPT_DPATH=$HOME/data/dvc-repos/toy_expt_dvc
# Create the above directories of they don't exist
mkdir -p "$DVC_DATA_DPATH"
mkdir -p "$DVC_EXPT_DPATH"
# The user can overwrite these variables, but will default if they are not given.
NUM_TOY_TRAIN_VIDS="${NUM_TOY_TRAIN_VIDS:=100}" # If variable not set or null, use default.
NUM_TOY_VALI_VIDS="${NUM_TOY_VALI_VIDS:=5}" # If variable not set or null, use default.
NUM_TOY_TEST_VIDS="${NUM_TOY_TEST_VIDS:=2}" # If variable not set or null, use default.
# Generate toy datasets
TRAIN_FPATH=$DVC_DATA_DPATH/vidshapes_msi_train${NUM_TOY_TRAIN_VIDS}/data.kwcoco.json
VALI_FPATH=$DVC_DATA_DPATH/vidshapes_msi_vali${NUM_TOY_VALI_VIDS}/data.kwcoco.json
TEST_FPATH=$DVC_DATA_DPATH/vidshapes_msi_test${NUM_TOY_TEST_VIDS}/data.kwcoco.json
generate_data(){
kwcoco toydata --key="vidshapes${NUM_TOY_TRAIN_VIDS}-frames5-randgsize-speed0.2-msi-multisensor" \
--bundle_dpath "$DVC_DATA_DPATH/vidshapes_msi_train${NUM_TOY_TRAIN_VIDS}" --verbose=1
kwcoco toydata --key="vidshapes${NUM_TOY_VALI_VIDS}-frames5-randgsize-speed0.2-msi-multisensor" \
--bundle_dpath "$DVC_DATA_DPATH/vidshapes_msi_vali${NUM_TOY_VALI_VIDS}" --verbose=1
kwcoco toydata --key="vidshapes${NUM_TOY_TEST_VIDS}-frames6-randgsize-speed0.2-msi-multisensor" \
--bundle_dpath "$DVC_DATA_DPATH/vidshapes_msi_test${NUM_TOY_TEST_VIDS}" --verbose=1
}
print_stats(){
# Print stats
kwcoco stats "$TRAIN_FPATH" "$VALI_FPATH" "$TEST_FPATH"
geowatch stats "$TRAIN_FPATH" "$VALI_FPATH" "$TEST_FPATH"
}
# If the train file path does not exist then
if [[ ! -e "$TRAIN_FPATH" ]]; then
generate_data
print_stats
fi
__doc__="
Should look like
dset n_anns n_imgs n_videos n_cats r|g|b|disparity|gauss|B8|B11 B1|B8|B8a|B10|B11 r|g|b|flowx|flowy|distri|B10|B11
0 vidshapes_msi_train/data.kwcoco.json 80 40 8 3 12 12 16
1 vidshapes_msi_vali/data.kwcoco.json 50 25 5 3 9 10 6
2 vidshapes_msi_test/data.kwcoco.json 24 12 2 3 5 3 4
"
demo_visualize_toydata(){
python -m geowatch.cli.coco_visualize_videos \
--src "$TRAIN_FPATH" \
--channels="gauss|B11,r|g|b,B1|B8|B11" \
--viz_dpath="$DVC_DATA_DPATH/vidshapes_msi_train_viz" --animate=False
}
# Run the function if you want to visualize the data
# demo_visualize_toydata
# Define the channels we want to use
# The sensors and channels are specified by the kwcoco SensorChanSpec
# in this example the data does not contain sensor metadata, so we
# use a "*" to indicate a generic sensor.
# A colon ":" separates channels from the sensors.
# A pipe "|" indicates channels are early fused
# A "," indicates groups of early fused channels are late fused Multiple
# sensors can be specified to the left of the channels and will distribute over
# commas.
# Note: the channel names dont really mean anything. The demo data generated
# for them is random / arbitrary. They simply simulate a case where you do have
# a meaningful multiple-channel dataset you want to learn with.
CHANNELS="(*):(disparity|gauss,X.2|Y:2:6,B1|B8a,flowx|flowy|distri)"
echo "CHANNELS = $CHANNELS"
echo "
In Tutorial #2 we expand the complexity of the 'fit' command compared to the
previous tutorial. We define some of the important parameters as environment
variables to illustrate relationships between them.
"
# Fit
DATASET_CODE=ToyDataMSI
WORKDIR=$DVC_EXPT_DPATH/training/$HOSTNAME/$USER
EXPERIMENT_NAME=ToyDataMSI_Demo_V001
DEFAULT_ROOT_DIR=$WORKDIR/$DATASET_CODE/runs/$EXPERIMENT_NAME
TARGET_LR=3e-4
WEIGHT_DECAY=$(python -c "print($TARGET_LR * 0.01)")
# Try to set the accelerator to "GPU", but fallback to cpu if nvidia-smi is not found
if which nvidia-smi ; then
echo "Detected an NVIDIA GPU"
export ACCELERATOR=gpu
# Set CUDA_VISIBLE_DEVICES to a comma separated list of the GPU #'s you want to
# use (index may start at 0).
export CUDA_VISIBLE_DEVICES=0
else
echo "No GPU detected, falling back to CPU"
export ACCELERATOR=cpu
export CUDA_VISIBLE_DEVICES=
fi
# However, if you have only have a cpu, the tutorial will still run.
# Comment out the above two commands and enable the command below.
# export ACCELERATOR=cpu
DEVICES=$(python -c "if 1:
import os
if os.environ.get('ACCELERATOR', '') == 'cpu':
print('1')
else:
n = len(os.environ.get('CUDA_VISIBLE_DEVICES', '').split(','))
print(','.join(list(map(str, range(n)))) + ',')
")
STRATEGY=$(python -c "if 1:
import os
if os.environ.get('ACCELERATOR', '') == 'cpu':
print('auto')
else:
n = len(os.environ.get('CUDA_VISIBLE_DEVICES', '').split(','))
print('ddp' if n > 1 else 'auto')
")
DDP_WORKAROUND=$(python -c "if 1:
import os
n = len(os.environ.get('CUDA_VISIBLE_DEVICES', '').split(','))
print(int(n > 1))
")
echo "
CUDA_VISIBLE_DEVICES = $CUDA_VISIBLE_DEVICES
ACCELERATOR = $ACCELERATOR
DEVICES = $DEVICES
STRATEGY = $STRATEGY
DDP_WORKAROUND = $DDP_WORKAROUND
"
# The following parameters are related and impose constraints on each other.
MAX_STEPS=400
MAX_EPOCHS=20
TRAIN_BATCHES_PER_EPOCH=20
ACCUMULATE_GRAD_BATCHES=1
BATCH_SIZE=2
TRAIN_ITEMS_PER_EPOCH=$(python -c "print($TRAIN_BATCHES_PER_EPOCH * $BATCH_SIZE)")
# Thus we expose this recommendation script to suggest changes to that satisfy
# the constraints. (Its ok if it isnt perfect)
python -m geowatch.cli.experimental.recommend_size_adjustments \
--MAX_STEPS=$MAX_STEPS \
--MAX_EPOCHS=$MAX_EPOCHS \
--BATCH_SIZE=$BATCH_SIZE \
--ACCUMULATE_GRAD_BATCHES=$ACCUMULATE_GRAD_BATCHES \
--TRAIN_BATCHES_PER_EPOCH="$TRAIN_BATCHES_PER_EPOCH" \
--TRAIN_ITEMS_PER_EPOCH="$TRAIN_ITEMS_PER_EPOCH"
# Now that we have important variables defined in the environment we can run
# the actual fit command.
DDP_WORKAROUND=$DDP_WORKAROUND python -m geowatch.tasks.fusion fit --config "
data:
num_workers : 4
train_dataset : $TRAIN_FPATH
vali_dataset : $VALI_FPATH
channels : '$CHANNELS'
time_steps : 5
chip_dims : 128
batch_size : $BATCH_SIZE
model:
class_path: MultimodalTransformer
init_args:
name : $EXPERIMENT_NAME
arch_name : smt_it_stm_p8
window_size : 8
dropout : 0.1
global_saliency_weight: 1.0
global_class_weight: 1.0
global_change_weight: 0.0
optimizer:
class_path: torch.optim.Adam
init_args:
lr: $TARGET_LR
weight_decay: $WEIGHT_DECAY
betas:
- 0.9
- 0.99
trainer:
accumulate_grad_batches: $ACCUMULATE_GRAD_BATCHES
default_root_dir : $DEFAULT_ROOT_DIR
accelerator : $ACCELERATOR
devices : $DEVICES
strategy : $STRATEGY
limit_train_batches : $TRAIN_BATCHES_PER_EPOCH
check_val_every_n_epoch: 1
enable_checkpointing: true
enable_model_summary: true
log_every_n_steps: 5
logger: true
max_steps: $MAX_STEPS
num_sanity_val_steps: 0
initializer:
init: noop
"
# This is the default final package that is done at the end of training.
# See comments inside of demo_force_repackage
PACKAGE_FPATH="$DEFAULT_ROOT_DIR"/final_package.pt
demo_force_repackage(){
# TODO: the new watch.mlops tool will need to handle this soon.
# NOTE: The above fit script might not produce the "best" checkpoint as the
# final output package. To evaluate a different checkpoint it must first be
# packaged. (note this can be done while training is running so
# intermediate checkpoints can be evaluated while the model is still
# learning). The following is logic for how to "package" a single checkpoint.
#
# Look at all checkpoints
ls "$DEFAULT_ROOT_DIR"/*/*/checkpoints/*.ckpt
# Grab the latest checkpoint (this is an arbitrary choice)
CHECKPOINT_FPATH=$(find "$DEFAULT_ROOT_DIR" -iname "*.ckpt" | tail -n 1)
echo "CHECKPOINT_FPATH = $CHECKPOINT_FPATH"
# Repackage a particular checkpoint as a torch.package .pt file.
python -m geowatch.tasks.fusion.repackage repackage "$CHECKPOINT_FPATH"
# Redefine package fpath variable to be that checkpoint
PACKAGE_FPATH=$(python -m geowatch.tasks.fusion.repackage repackage "$CHECKPOINT_FPATH" | tail -n 1)
echo "PACKAGE_FPATH = $PACKAGE_FPATH"
}
# Define where we will output predictions / evaluations
# Later we will see how mlops.schedule_evaluation will do this for you, but in
# this tutorial we are going to run prediction and evaluation manually.
PRED_FPATH=$DEFAULT_ROOT_DIR/predictions/pred.kwcoco.json
EVAL_DPATH=$DEFAULT_ROOT_DIR/evaluation
# Predict using one of the packaged models
python -m geowatch.tasks.fusion.predict \
--package_fpath="$PACKAGE_FPATH" \
--test_dataset="$TEST_FPATH" \
--pred_dataset="$PRED_FPATH" \
# Dump stats of truth vs prediction.
# We should see soft segmentation masks in pred, but not in truth
kwcoco stats "$TEST_FPATH" "$PRED_FPATH"
geowatch stats "$TEST_FPATH" "$PRED_FPATH"
# Visualize pixel predictions with a raw band, predicted saliency, and predicted class.
geowatch visualize "$PRED_FPATH" --channels='B11,salient,star|superstar|eff' --smart=True
# Evaluate the predictions
python -m geowatch.tasks.fusion.evaluate \
--true_dataset="$TEST_FPATH" \
--pred_dataset="$PRED_FPATH" \
--eval_dpath="$EVAL_DPATH"