@@ -365,14 +365,14 @@ static void llama_params_fit_impl(
365365 case LAYER_FRACTION_ATTN: {
366366 static std::array<std::string, n_strings> patterns;
367367 if (patterns[il].empty ()) {
368- patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(up|gate |down).*" ;
368+ patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(gate| up|gate_up |down).*" ;
369369 }
370370 return patterns[il].c_str ();
371371 }
372372 case LAYER_FRACTION_UP: {
373373 static std::array<std::string, n_strings> patterns;
374374 if (patterns[il].empty ()) {
375- patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(gate|down).*" ;
375+ patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(gate|gate_up| down).*" ;
376376 }
377377 return patterns[il].c_str ();
378378 }
@@ -386,7 +386,7 @@ static void llama_params_fit_impl(
386386 case LAYER_FRACTION_MOE: {
387387 static std::array<std::string, n_strings> patterns;
388388 if (patterns[il].empty ()) {
389- patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(up|down|gate)_(ch|)exps" ;
389+ patterns[il] = " blk\\ ." + std::to_string (il) + " \\ .ffn_(up|down|gate_up| gate)_(ch|)exps" ;
390390 }
391391 return patterns[il].c_str ();
392392 }
@@ -480,7 +480,7 @@ static void llama_params_fit_impl(
480480
481481 int64_t global_surplus_cpu_moe = 0 ;
482482 if (hp_nex > 0 ) {
483- const static std::string pattern_moe_all = " blk\\ .\\ d+\\ .ffn_(up|down|gate)_(ch|)exps" ; // matches all MoE tensors
483+ const static std::string pattern_moe_all = " blk\\ .\\ d+\\ .ffn_(up|down|gate_up| gate)_(ch|)exps" ; // matches all MoE tensors
484484 ggml_backend_buffer_type_t cpu_buft = ggml_backend_cpu_buffer_type ();
485485 tensor_buft_overrides[0 ] = {pattern_moe_all.c_str (), cpu_buft};
486486 tensor_buft_overrides[1 ] = {nullptr , nullptr };
0 commit comments