-
Notifications
You must be signed in to change notification settings - Fork 805
Expand file tree
/
Copy pathstartup_script.sh
More file actions
77 lines (61 loc) · 1.63 KB
/
startup_script.sh
File metadata and controls
77 lines (61 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
# Note that all __XYZ__ strings are replaced by launch_gce.py
WORKDIR="/train/workdir_base/__EXAMPLE__/__NAME__/__TIMESTAMP__"
mkdir -p /train
cd /train
# Login directly with:
# gcloud compute ssh $VM -- /sudo_tmux_a.sh
echo -e '#!/bin/bash\nsudo /tmux_a.sh' > /sudo_tmux_a.sh
chmod a+x /sudo_tmux_a.sh
echo -e '#!/bin/bash\ntmux a' > /tmux_a.sh
chmod a+x /tmux_a.sh
# Main script running in bottom left tmux pane.
cat >/install_train_stop.sh <<EOF
set -x
(
conda activate flax &&
[ -d flax ] || (
git clone --depth 1 -b __BRANCH__ __REPO__ &&
cd flax &&
conda create -yn flax python==3.9 &&
conda activate flax &&
pip install -U pip &&
pip install -e . &&
cd examples/__EXAMPLE__ &&
pip install -r requirements.txt &&
cd /train
) &&
conda activate flax &&
cd flax &&
cd examples/__EXAMPLE__ &&
TFDS_DATA_DIR='__TFDS_DATA_DIR__' python main.py --workdir=$WORKDIR __ARGS__
) 2>&1 | tee -a $WORKDIR/setup_train_log_${TIMESTAMP}.txt
if [ __SHUTDOWN_SECS__ -gt 0 ]; then
echo
echo WILL SHUT DOWN IN $((__SHUTDOWN_SECS__/60)) MIN ...
sleep __SHUTDOWN_SECS__ && shutdown now
fi
EOF
# Set up TMUX panes:
tmux new-session -s flax -d
# - top left: htop
tmux send 'htop
'
tmux split-window
tmux selectp -U
tmux split-window -h
# - top right: htop
tmux send 'watch nvidia-smi
'
tmux selectp -D
# - bottom left: main script
tmux send '. /install_train_stop.sh
'
tmux split-window -h
# - bottom right: rsync files to GCS bucket.
tmux send "
while true; do
gcloud storage rsync --recursive workdir_base __GCS_WORKDIR_BASE__
sleep 60
done 2>&1 | tee -a $WORKDIR/gcs_rsync_'__TIMESTAMP__'.txt
"