forked from SamsungSAILMontreal/TinyRecursiveModels
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcfg_pretrain.yaml
More file actions
42 lines (31 loc) · 749 Bytes
/
cfg_pretrain.yaml
File metadata and controls
42 lines (31 loc) · 749 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# ARC training config
defaults:
- arch: trm
- _self_
hydra:
output_subdir: null
# Data path
data_paths: ['data/arc-aug-1000']
data_paths_test: []
evaluators:
- name: arc@ARC
# Hyperparams - Training
global_batch_size: 768
epochs: 100000
eval_interval: 10000
checkpoint_every_eval: True
lr: 1e-4
lr_min_ratio: 1.0
lr_warmup_steps: 2000
# Standard hyperparameter settings for LM, as used in Llama
beta1: 0.9
beta2: 0.95
weight_decay: 0.1
puzzle_emb_weight_decay: 0.1
# Hyperparams - Puzzle embeddings training
puzzle_emb_lr: 1e-2
seed: 0
min_eval_interval: 0 # when to start the eval
ema: False # use Exponential-Moving-Average
ema_rate: 0.999 # EMA-rate
freeze_weights: False # If True, freeze weights and only learn the embeddings