llama-fit: fix regex pattern for gate_up tensors (ggml-org#20910)

am17an · JohannesGaessler · web-flow · commit e852eb490136 · 2026-03-24T12:57:57.000+08:00
* llama-fit: fix regex pattern for gate_up tensors

* Apply suggestions from code review

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;

---------

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -365,14 +365,14 @@ static void llama_params_fit_impl(
             case LAYER_FRACTION_ATTN: {
                 static std::array<std::string, n_strings> patterns;
                 if (patterns[il].empty()) {
-                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|gate|down).*";
+                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|up|gate_up|down).*";
                 }
                 return patterns[il].c_str();
             }
             case LAYER_FRACTION_UP: {
                 static std::array<std::string, n_strings> patterns;
                 if (patterns[il].empty()) {
-                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|down).*";
+                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate|gate_up|down).*";
                 }
                 return patterns[il].c_str();
             }
@@ -386,7 +386,7 @@ static void llama_params_fit_impl(
             case LAYER_FRACTION_MOE: {
                 static std::array<std::string, n_strings> patterns;
                 if (patterns[il].empty()) {
-                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|down|gate)_(ch|)exps";
+                    patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up|down|gate_up|gate)_(ch|)exps";
                 }
                 return patterns[il].c_str();
             }
@@ -480,7 +480,7 @@ static void llama_params_fit_impl(
 
     int64_t global_surplus_cpu_moe = 0;
     if (hp_nex > 0) {
-        const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up|down|gate)_(ch|)exps"; // matches all MoE tensors
+        const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up|down|gate_up|gate)_(ch|)exps"; // matches all MoE tensors
         ggml_backend_buffer_type_t cpu_buft = ggml_backend_cpu_buffer_type();
         tensor_buft_overrides[0] = {pattern_moe_all.c_str(), cpu_buft};
         tensor_buft_overrides[1] = {nullptr, nullptr};

Original file line number	Diff line number	Diff line change
`@@ -365,14 +365,14 @@ static void llama_params_fit_impl(`
`365`	`365`	`case LAYER_FRACTION_ATTN: {`
`366`	`366`	`static std::array<std::string, n_strings> patterns;`
`367`	`367`	`if (patterns[il].empty()) {`
`368`		`- patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up\|gate\|down).*";`
	`368`	`+ patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate\|up\|gate_up\|down).*";`
`369`	`369`	`}`
`370`	`370`	`return patterns[il].c_str();`
`371`	`371`	`}`
`372`	`372`	`case LAYER_FRACTION_UP: {`
`373`	`373`	`static std::array<std::string, n_strings> patterns;`
`374`	`374`	`if (patterns[il].empty()) {`
`375`		`- patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate\|down).*";`
	`375`	`+ patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(gate\|gate_up\|down).*";`
`376`	`376`	`}`
`377`	`377`	`return patterns[il].c_str();`
`378`	`378`	`}`
`@@ -386,7 +386,7 @@ static void llama_params_fit_impl(`
`386`	`386`	`case LAYER_FRACTION_MOE: {`
`387`	`387`	`static std::array<std::string, n_strings> patterns;`
`388`	`388`	`if (patterns[il].empty()) {`
`389`		`- patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up\|down\|gate)_(ch\|)exps";`
	`389`	`+ patterns[il] = "blk\\." + std::to_string(il) + "\\.ffn_(up\|down\|gate_up\|gate)_(ch\|)exps";`
`390`	`390`	`}`
`391`	`391`	`return patterns[il].c_str();`
`392`	`392`	`}`
`@@ -480,7 +480,7 @@ static void llama_params_fit_impl(`
`480`	`480`
`481`	`481`	`int64_t global_surplus_cpu_moe = 0;`
`482`	`482`	`if (hp_nex > 0) {`
`483`		`- const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up\|down\|gate)_(ch\|)exps"; // matches all MoE tensors`
	`483`	`+ const static std::string pattern_moe_all = "blk\\.\\d+\\.ffn_(up\|down\|gate_up\|gate)_(ch\|)exps"; // matches all MoE tensors`
`484`	`484`	`ggml_backend_buffer_type_t cpu_buft = ggml_backend_cpu_buffer_type();`
`485`	`485`	`tensor_buft_overrides[0] = {pattern_moe_all.c_str(), cpu_buft};`
`486`	`486`	`tensor_buft_overrides[1] = {nullptr, nullptr};`