***************************************************** llama-cli log (working, no problem) ***************************************************** !echo "Hello!\nJust wanted to say hello.\n/exit\n" | ./llama.cpp/build/bin/llama-cli -m "Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf" ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 5090, compute capability 12.0, VMM: yes Loading model... ▄▄ ▄▄ ██ ██ ██ ██ ▀▀█▄ ███▄███▄ ▀▀█▄ ▄████ ████▄ ████▄ ██ ██ ▄█▀██ ██ ██ ██ ▄█▀██ ██ ██ ██ ██ ██ ██ ██ ▀█▄██ ██ ██ ██ ▀█▄██ ██ ▀████ ████▀ ████▀ ██ ██ ▀▀ ▀▀ build : b7640-e443fbcfa model : Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf modalities : text available commands: /exit or Ctrl+C stop or exit /regen regenerate the last response /clear clear the chat history /read add a text file > Hello! The user says "Hello!". We should respond politely. No special instructions. Just greet back. Hello! How can I help you today? [ Prompt: 106.7 t/s | Generation: 219.4 t/s ] > Just wanted to say hello. The user says "Just wanted to say hello." We can respond politely, maybe ask how they're doing. No constraints. Just respond friendly. Hello! Nice to meet you. How’s your day going? [ Prompt: 24.0 t/s | Generation: 268.8 t/s ] > /exit Exiting... llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted | llama_memory_breakdown_print: | - CUDA0 (RTX 5090) | 32100 = 1050 + (29932 = 23140 + 4531 + 2260) + 1118 | llama_memory_breakdown_print: | - Host | 1730 = 231 + 0 + 1499 | ***************************************************** from llama_cpp import Llama llm = Llama( #https://huggingface.co/ggml-org/Nemotron-Nano-3-30B-A3B-GGUF model_path="Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf", n_gpu_layers=100, n_ctx=48*1024, verbose=True ) ***************************************************** ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 5090, compute capability 12.0, VMM: yes llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 5090) (0000:08:00.0) - 31072 MiB free llama_model_loader: loaded meta data with 48 key-value pairs and 401 tensors from ../../DATA/OPT.Models/GGUF/Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = nemotron_h_moe llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.sampling.top_p f32 = 1.000000 llama_model_loader: - kv 3: general.sampling.temp f32 = 1.000000 llama_model_loader: - kv 4: general.name str = Nano v3 Llm_Vv1.0.1 Ga llama_model_loader: - kv 5: general.size_label str = 128x2.4B llama_model_loader: - kv 6: nemotron_h_moe.block_count u32 = 52 llama_model_loader: - kv 7: nemotron_h_moe.context_length u32 = 1048576 llama_model_loader: - kv 8: nemotron_h_moe.embedding_length u32 = 2688 llama_model_loader: - kv 9: nemotron_h_moe.feed_forward_length arr[i32,52] = [0, 1856, 0, 1856, 0, 0, 1856, 0, 185... llama_model_loader: - kv 10: nemotron_h_moe.attention.head_count u32 = 32 llama_model_loader: - kv 11: nemotron_h_moe.attention.head_count_kv arr[i32,52] = [0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, ... llama_model_loader: - kv 12: nemotron_h_moe.rope.freq_base f32 = 10000.000000 llama_model_loader: - kv 13: nemotron_h_moe.attention.layer_norm_rms_epsilon f32 = 0.000010 llama_model_loader: - kv 14: nemotron_h_moe.attention.layer_norm_epsilon f32 = 0.000010 llama_model_loader: - kv 15: nemotron_h_moe.expert_used_count u32 = 6 llama_model_loader: - kv 16: nemotron_h_moe.expert_group_count u32 = 1 llama_model_loader: - kv 17: nemotron_h_moe.expert_group_used_count u32 = 1 llama_model_loader: - kv 18: nemotron_h_moe.vocab_size u32 = 131072 llama_model_loader: - kv 19: nemotron_h_moe.rope.dimension_count u32 = 84 llama_model_loader: - kv 20: nemotron_h_moe.ssm.conv_kernel u32 = 4 llama_model_loader: - kv 21: nemotron_h_moe.ssm.state_size u32 = 128 llama_model_loader: - kv 22: nemotron_h_moe.ssm.group_count u32 = 8 llama_model_loader: - kv 23: nemotron_h_moe.ssm.inner_size u32 = 4096 llama_model_loader: - kv 24: nemotron_h_moe.ssm.time_step_rank u32 = 64 llama_model_loader: - kv 25: nemotron_h_moe.rope.scaling.finetuned bool = false llama_model_loader: - kv 26: nemotron_h_moe.attention.key_length u32 = 128 llama_model_loader: - kv 27: nemotron_h_moe.attention.value_length u32 = 128 llama_model_loader: - kv 28: nemotron_h_moe.expert_feed_forward_length u32 = 1856 llama_model_loader: - kv 29: nemotron_h_moe.expert_shared_feed_forward_length u32 = 3712 llama_model_loader: - kv 30: nemotron_h_moe.expert_count u32 = 128 llama_model_loader: - kv 31: nemotron_h_moe.expert_shared_count u32 = 1 llama_model_loader: - kv 32: nemotron_h_moe.expert_weights_norm bool = true llama_model_loader: - kv 33: nemotron_h_moe.expert_weights_scale f32 = 2.500000 llama_model_loader: - kv 34: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 35: tokenizer.ggml.pre str = pixtral llama_model_loader: - kv 36: tokenizer.ggml.tokens arr[str,131072] = ["", "", "", "[INST]", "[... llama_model_loader: - kv 37: tokenizer.ggml.token_type arr[i32,131072] = [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ... Exception ignored on calling ctypes callback function: Traceback (most recent call last): File "/usr/local/lib/python3.11/dist-packages/llama_cpp/_logger.py", line 39, in llama_log_callback print(text.decode("utf-8"), end="", flush=True, file=sys.stderr) ^^^^^^^^^^^^^^^^^^^^ UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc4 in position 128: invalid continuation byte llama_model_loader: - kv 39: tokenizer.ggml.bos_token_id u32 = 1 llama_model_loader: - kv 40: tokenizer.ggml.eos_token_id u32 = 11 llama_model_loader: - kv 41: tokenizer.ggml.unknown_token_id u32 = 0 llama_model_loader: - kv 42: tokenizer.ggml.padding_token_id u32 = 0 llama_model_loader: - kv 43: tokenizer.ggml.add_bos_token bool = false llama_model_loader: - kv 44: tokenizer.ggml.add_eos_token bool = false llama_model_loader: - kv 45: tokenizer.chat_template str = {% macro render_extra_keys(json_dict,... llama_model_loader: - kv 46: general.quantization_version u32 = 2 llama_model_loader: - kv 47: general.file_type u32 = 15 llama_model_loader: - type f32: 237 tensors llama_model_loader: - type q5_0: 110 tensors llama_model_loader: - type q8_0: 25 tensors llama_model_loader: - type q4_K: 29 tensors print_info: file format = GGUF V3 (latest) print_info: file type = Q4_K - Medium print_info: file size = 22.82 GiB (6.21 BPW) init_tokenizer: initializing tokenizer for type 2 load: control token: 216 '' is not marked as EOG load: control token: 408 '' is not marked as EOG load: control token: 845 '' is not marked as EOG load: control token: 555 '' is not marked as EOG load: control token: 257 '' is not marked as EOG load: control token: 474 '' is not marked as EOG load: control token: 241 '' is not marked as EOG load: control token: 646 '' is not marked as EOG load: control token: 601 '' is not marked as EOG load: control token: 905 '' is not marked as EOG load: control token: 168 '' is not marked as EOG load: control token: 37 '' is not marked as EOG load: control token: 125 '' is not marked as EOG load: control token: 95 '' is not marked as EOG load: control token: 304 '' is not marked as EOG load: control token: 706 '' is not marked as EOG load: control token: 423 '' is not marked as EOG load: control token: 6 '[/AVAILABLE_TOOLS]' is not marked as EOG load: control token: 592 '' is not marked as EOG load: control token: 409 '' is not marked as EOG load: control token: 231 '' is not marked as EOG load: control token: 607 '' is not marked as EOG load: control token: 7 '[TOOL_RESULTS]' is not marked as EOG load: control token: 138 '' is not marked as EOG load: control token: 455 '' is not marked as EOG load: control token: 801 '' is not marked as EOG load: control token: 288 '' is not marked as EOG load: control token: 619 '' is not marked as EOG load: control token: 133 '' is not marked as EOG load: control token: 827 '' is not marked as EOG load: control token: 771 '' is not marked as EOG load: control token: 723 '' is not marked as EOG load: control token: 494 '' is not marked as EOG load: control token: 444 '' is not marked as EOG load: control token: 422 '' is not marked as EOG load: control token: 441 '' is not marked as EOG load: control token: 372 '' is not marked as EOG load: control token: 874 '' is not marked as EOG load: control token: 137 '' is not marked as EOG load: control token: 250 '' is not marked as EOG load: control token: 840 '' is not marked as EOG load: control token: 846 '' is not marked as EOG load: control token: 465 '' is not marked as EOG load: control token: 425 '' is not marked as EOG load: control token: 984 '' is not marked as EOG load: control token: 328 '' is not marked as EOG load: control token: 186 '' is not marked as EOG load: control token: 973 '' is not marked as EOG load: control token: 394 '' is not marked as EOG load: control token: 522 '' is not marked as EOG load: control token: 385 '' is not marked as EOG load: control token: 496 '' is not marked as EOG load: control token: 498 '' is not marked as EOG load: control token: 489 '' is not marked as EOG load: control token: 767 '' is not marked as EOG load: control token: 916 '' is not marked as EOG load: control token: 818 '' is not marked as EOG load: control token: 894 '' is not marked as EOG load: control token: 668 '' is not marked as EOG load: control token: 566 '' is not marked as EOG load: control token: 898 '' is not marked as EOG load: control token: 490 '' is not marked as EOG load: control token: 977 '' is not marked as EOG load: control token: 900 '' is not marked as EOG load: control token: 541 '' is not marked as EOG load: control token: 21 '' is not marked as EOG load: control token: 787 '' is not marked as EOG load: control token: 722 '' is not marked as EOG load: control token: 803 '' is not marked as EOG load: control token: 210 '' is not marked as EOG load: control token: 205 '' is not marked as EOG load: control token: 485 '' is not marked as EOG load: control token: 40 '' is not marked as EOG load: control token: 57 '' is not marked as EOG load: control token: 115 '' is not marked as EOG load: control token: 798 '' is not marked as EOG load: control token: 700 '' is not marked as EOG load: control token: 961 '' is not marked as EOG load: control token: 101 '' is not marked as EOG load: control token: 279 '' is not marked as EOG load: control token: 139 '' is not marked as EOG load: control token: 903 '' is not marked as EOG load: control token: 625 '' is not marked as EOG load: control token: 147 '' is not marked as EOG load: control token: 837 '' is not marked as EOG load: control token: 438 '' is not marked as EOG load: control token: 500 '' is not marked as EOG load: control token: 371 '' is not marked as EOG load: control token: 31 '' is not marked as EOG load: control token: 27 '' is not marked as EOG load: control token: 948 '' is not marked as EOG load: control token: 72 '' is not marked as EOG load: control token: 53 '' is not marked as EOG load: control token: 70 '' is not marked as EOG load: control token: 737 '' is not marked as EOG load: control token: 864 '' is not marked as EOG load: control token: 623 '' is not marked as EOG load: control token: 421 '' is not marked as EOG load: control token: 431 '' is not marked as EOG load: control token: 589 '' is not marked as EOG load: control token: 926 '' is not marked as EOG load: control token: 743 '' is not marked as EOG load: control token: 484 '' is not marked as EOG load: control token: 721 '' is not marked as EOG load: control token: 940 '' is not marked as EOG load: control token: 971 '' is not marked as EOG load: control token: 136 '' is not marked as EOG load: control token: 445 '' is not marked as EOG load: control token: 142 '' is not marked as EOG load: control token: 934 '' is not marked as EOG load: control token: 316 '' is not marked as EOG load: control token: 690 '' is not marked as EOG load: control token: 887 '' is not marked as EOG load: control token: 531 '' is not marked as EOG load: control token: 482 '' is not marked as EOG load: control token: 560 '' is not marked as EOG load: control token: 333 '' is not marked as EOG load: control token: 574 '' is not marked as EOG load: control token: 734 '' is not marked as EOG load: control token: 491 '' is not marked as EOG load: control token: 199 '' is not marked as EOG load: control token: 449 '' is not marked as EOG load: control token: 704 '' is not marked as EOG load: control token: 618 '' is not marked as EOG load: control token: 516 '' is not marked as EOG load: control token: 661 '' is not marked as EOG load: control token: 459 '' is not marked as EOG load: control token: 134 '' is not marked as EOG load: control token: 708 '' is not marked as EOG load: control token: 426 '' is not marked as EOG load: control token: 727 '' is not marked as EOG load: control token: 506 '' is not marked as EOG load: control token: 265 '' is not marked as EOG load: control token: 861 '' is not marked as EOG load: control token: 988 '' is not marked as EOG load: control token: 189 '' is not marked as EOG load: control token: 367 '' is not marked as EOG load: control token: 118 '' is not marked as EOG load: control token: 526 '' is not marked as EOG load: control token: 527 '' is not marked as EOG load: control token: 448 '' is not marked as EOG load: control token: 86 '' is not marked as EOG load: control token: 554 '' is not marked as EOG load: control token: 895 '' is not marked as EOG load: control token: 608 '' is not marked as EOG load: control token: 681 '' is not marked as EOG load: control token: 403 '' is not marked as EOG load: control token: 503 '' is not marked as EOG load: control token: 20 '' is not marked as EOG load: control token: 797 '' is not marked as EOG load: control token: 476 '' is not marked as EOG load: control token: 359 '' is not marked as EOG load: control token: 909 '' is not marked as EOG load: control token: 323 '' is not marked as EOG load: control token: 435 '' is not marked as EOG load: control token: 43 '' is not marked as EOG load: control token: 970 '' is not marked as EOG load: control token: 175 '' is not marked as EOG load: control token: 300 '' is not marked as EOG load: control token: 505 '' is not marked as EOG load: control token: 158 '' is not marked as EOG load: control token: 890 '' is not marked as EOG load: control token: 855 '' is not marked as EOG load: control token: 182 '' is not marked as EOG load: control token: 312 '' is not marked as EOG load: control token: 688 '' is not marked as EOG load: control token: 795 '' is not marked as EOG load: control token: 883 '' is not marked as EOG load: control token: 817 '' is not marked as EOG load: control token: 886 '' is not marked as EOG load: control token: 50 '' is not marked as EOG load: control token: 10 '<|im_start|>' is not marked as EOG load: control token: 314 '' is not marked as EOG load: control token: 519 '' is not marked as EOG load: control token: 989 '' is not marked as EOG load: control token: 653 '' is not marked as EOG load: control token: 779 '' is not marked as EOG load: control token: 377 '' is not marked as EOG load: control token: 473 '' is not marked as EOG load: control token: 61 '' is not marked as EOG load: control token: 165 '' is not marked as EOG load: control token: 745 '' is not marked as EOG load: control token: 963 '' is not marked as EOG load: control token: 775 '' is not marked as EOG load: control token: 450 '' is not marked as EOG load: control token: 396 '' is not marked as EOG load: control token: 587 '' is not marked as EOG load: control token: 418 '' is not marked as EOG load: control token: 414 '' is not marked as EOG load: control token: 859 '' is not marked as EOG load: control token: 218 '' is not marked as EOG load: control token: 252 '' is not marked as EOG load: control token: 55 '' is not marked as EOG load: control token: 802 '' is not marked as EOG load: control token: 249 '' is not marked as EOG load: control token: 130 '' is not marked as EOG load: control token: 34 '' is not marked as EOG load: control token: 834 '' is not marked as EOG load: control token: 660 '' is not marked as EOG load: control token: 638 '' is not marked as EOG load: control token: 380 '' is not marked as EOG load: control token: 411 '' is not marked as EOG load: control token: 62 '' is not marked as EOG load: control token: 719 '' is not marked as EOG load: control token: 340 '' is not marked as EOG load: control token: 939 '' is not marked as EOG load: control token: 891 '' is not marked as EOG load: control token: 184 '' is not marked as EOG load: control token: 714 '' is not marked as EOG load: control token: 19 '' is not marked as EOG load: control token: 794 '' is not marked as EOG load: control token: 736 '' is not marked as EOG load: control token: 509 '' is not marked as EOG load: control token: 983 '' is not marked as EOG load: control token: 720 '' is not marked as EOG load: control token: 862 '' is not marked as EOG load: control token: 829 '' is not marked as EOG load: control token: 925 '' is not marked as EOG load: control token: 698 '' is not marked as EOG load: control token: 451 '' is not marked as EOG load: control token: 570 '' is not marked as EOG load: control token: 214 '' is not marked as EOG load: control token: 144 '' is not marked as EOG load: control token: 561 '' is not marked as EOG load: control token: 582 '' is not marked as EOG load: control token: 523 '' is not marked as EOG load: control token: 65 '' is not marked as EOG load: control token: 311 '' is not marked as EOG load: control token: 693 '' is not marked as EOG load: control token: 857 '' is not marked as EOG load: control token: 513 '' is not marked as EOG load: control token: 271 '' is not marked as EOG load: control token: 919 '' is not marked as EOG load: control token: 542 '' is not marked as EOG load: control token: 594 '' is not marked as EOG load: control token: 338 '' is not marked as EOG load: control token: 32 '' is not marked as EOG load: control token: 955 '' is not marked as EOG load: control token: 969 '' is not marked as EOG load: control token: 460 '' is not marked as EOG load: control token: 518 '' is not marked as EOG load: control token: 588 '' is not marked as EOG load: control token: 701 '' is not marked as EOG load: control token: 319 '' is not marked as EOG load: control token: 320 '' is not marked as EOG load: control token: 233 '' is not marked as EOG load: control token: 277 '' is not marked as EOG load: control token: 585 '' is not marked as EOG load: control token: 213 '' is not marked as EOG load: control token: 599 '' is not marked as EOG load: control token: 291 '' is not marked as EOG load: control token: 583 '' is not marked as EOG load: control token: 160 '' is not marked as EOG load: control token: 109 '' is not marked as EOG load: control token: 164 '' is not marked as EOG load: control token: 436 '' is not marked as EOG load: control token: 461 '' is not marked as EOG load: control token: 982 '' is not marked as EOG load: control token: 621 '' is not marked as EOG load: control token: 303 '' is not marked as EOG load: control token: 207 '' is not marked as EOG load: control token: 654 '' is not marked as EOG load: control token: 868 '' is not marked as EOG load: control token: 437 '' is not marked as EOG load: control token: 641 '' is not marked as EOG load: control token: 200 '' is not marked as EOG load: control token: 741 '' is not marked as EOG load: control token: 738 '' is not marked as EOG load: control token: 786 '' is not marked as EOG load: control token: 82 '' is not marked as EOG load: control token: 234 '' is not marked as EOG load: control token: 563 '' is not marked as EOG load: control token: 358 '' is not marked as EOG load: control token: 497 '' is not marked as EOG load: control token: 296 '' is not marked as EOG load: control token: 652 '' is not marked as EOG load: control token: 756 '' is not marked as EOG load: control token: 170 '' is not marked as EOG load: control token: 495 '' is not marked as EOG load: control token: 176 '' is not marked as EOG load: control token: 89 '' is not marked as EOG load: control token: 724 '' is not marked as EOG load: control token: 251 '' is not marked as EOG load: control token: 238 '' is not marked as EOG load: control token: 913 '' is not marked as EOG load: control token: 766 '' is not marked as EOG load: control token: 826 '' is not marked as EOG load: control token: 547 '' is not marked as EOG load: control token: 374 '' is not marked as EOG load: control token: 514 '' is not marked as EOG load: control token: 950 '' is not marked as EOG load: control token: 598 '' is not marked as EOG load: control token: 850 '' is not marked as EOG load: control token: 908 '' is not marked as EOG load: control token: 517 '' is not marked as EOG load: control token: 679 '' is not marked as EOG load: control token: 578 '' is not marked as EOG load: control token: 75 '' is not marked as EOG load: control token: 462 '' is not marked as EOG load: control token: 456 '' is not marked as EOG load: control token: 979 '' is not marked as EOG load: control token: 446 '' is not marked as EOG load: control token: 917 '' is not marked as EOG load: control token: 549 '' is not marked as EOG load: control token: 183 '' is not marked as EOG load: control token: 424 '' is not marked as EOG load: control token: 419 '' is not marked as EOG load: control token: 413 '' is not marked as EOG load: control token: 119 '' is not marked as EOG load: control token: 66 '' is not marked as EOG load: control token: 478 '' is not marked as EOG load: control token: 398 '' is not marked as EOG load: control token: 386 '' is not marked as EOG load: control token: 440 '' is not marked as EOG load: control token: 202 '' is not marked as EOG load: control token: 747 '' is not marked as EOG load: control token: 825 '' is not marked as EOG load: control token: 997 '' is not marked as EOG load: control token: 369 '' is not marked as EOG load: control token: 696 '' is not marked as EOG load: control token: 512 '' is not marked as EOG load: control token: 349 '' is not marked as EOG load: control token: 985 '' is not marked as EOG load: control token: 337 '' is not marked as EOG load: control token: 135 '' is not marked as EOG load: control token: 283 '' is not marked as EOG load: control token: 776 '' is not marked as EOG load: control token: 327 '' is not marked as EOG load: control token: 610 '' is not marked as EOG load: control token: 752 '' is not marked as EOG load: control token: 510 '' is not marked as EOG load: control token: 92 '' is not marked as EOG load: control token: 69 '' is not marked as EOG load: control token: 703 '' is not marked as EOG load: control token: 284 '' is not marked as EOG load: control token: 325 '' is not marked as EOG load: control token: 228 '' is not marked as EOG load: control token: 355 '' is not marked as EOG load: control token: 389 '' is not marked as EOG load: control token: 458 '' is not marked as EOG load: control token: 264 '' is not marked as EOG load: control token: 335 '' is not marked as EOG load: control token: 181 '' is not marked as EOG load: control token: 263 '' is not marked as EOG load: control token: 671 '' is not marked as EOG load: control token: 63 '' is not marked as EOG load: control token: 261 '' is not marked as EOG load: control token: 760 '' is not marked as EOG load: control token: 399 '' is not marked as EOG load: control token: 692 '' is not marked as EOG load: control token: 956 '' is not marked as EOG load: control token: 400 '' is not marked as EOG load: control token: 83 '' is not marked as EOG load: control token: 79 '' is not marked as EOG load: control token: 564 '' is not marked as EOG load: control token: 537 '' is not marked as EOG load: control token: 959 '' is not marked as EOG load: control token: 910 '' is not marked as EOG load: control token: 285 '' is not marked as EOG load: control token: 524 '' is not marked as EOG load: control token: 565 '' is not marked as EOG load: control token: 744 '' is not marked as EOG load: control token: 255 '' is not marked as EOG load: control token: 866 '' is not marked as EOG load: control token: 538 '' is not marked as EOG load: control token: 280 '' is not marked as EOG load: control token: 815 '' is not marked as EOG load: control token: 204 '' is not marked as EOG load: control token: 475 '' is not marked as EOG load: control token: 480 '' is not marked as EOG load: control token: 166 '' is not marked as EOG load: control token: 562 '' is not marked as EOG load: control token: 47 '' is not marked as EOG load: control token: 471 '' is not marked as EOG load: control token: 486 '' is not marked as EOG load: control token: 244 '' is not marked as EOG load: control token: 301 '' is not marked as EOG load: control token: 2 '' is not marked as EOG load: control token: 580 '' is not marked as EOG load: control token: 246 '' is not marked as EOG load: control token: 52 '' is not marked as EOG load: control token: 295 '' is not marked as EOG load: control token: 994 '' is not marked as EOG load: control token: 90 '' is not marked as EOG load: control token: 122 '' is not marked as EOG load: control token: 370 '' is not marked as EOG load: control token: 124 '' is not marked as EOG load: control token: 644 '' is not marked as EOG load: control token: 357 '' is not marked as EOG load: control token: 275 '' is not marked as EOG load: control token: 488 '' is not marked as EOG load: control token: 492 '' is not marked as EOG load: control token: 944 '' is not marked as EOG load: control token: 45 '' is not marked as EOG load: control token: 262 '' is not marked as EOG load: control token: 276 '' is not marked as EOG load: control token: 757 '' is not marked as EOG load: control token: 305 '' is not marked as EOG load: control token: 683 '' is not marked as EOG load: control token: 382 '' is not marked as EOG load: control token: 432 '' is not marked as EOG load: control token: 831 '' is not marked as EOG load: control token: 824 '' is not marked as EOG load: control token: 454 '' is not marked as EOG load: control token: 107 '' is not marked as EOG load: control token: 46 '' is not marked as EOG load: control token: 140 '' is not marked as EOG load: control token: 352 '' is not marked as EOG load: control token: 871 '' is not marked as EOG load: control token: 470 '' is not marked as EOG load: control token: 209 '' is not marked as EOG load: control token: 71 '' is not marked as EOG load: control token: 807 '' is not marked as EOG load: control token: 68 '' is not marked as EOG load: control token: 678 '' is not marked as EOG load: control token: 511 '' is not marked as EOG load: control token: 49 '' is not marked as EOG load: control token: 938 '' is not marked as EOG load: control token: 499 '' is not marked as EOG load: control token: 557 '' is not marked as EOG load: control token: 171 '' is not marked as EOG load: control token: 584 '' is not marked as EOG load: control token: 915 '' is not marked as EOG load: control token: 699 '' is not marked as EOG load: control token: 838 '' is not marked as EOG load: control token: 18 '' is not marked as EOG load: control token: 749 '' is not marked as EOG load: control token: 206 '' is not marked as EOG load: control token: 710 '' is not marked as EOG load: control token: 155 '' is not marked as EOG load: control token: 247 '' is not marked as EOG load: control token: 732 '' is not marked as EOG load: control token: 208 '' is not marked as EOG load: control token: 365 '' is not marked as EOG load: control token: 321 '' is not marked as EOG load: control token: 777 '' is not marked as EOG load: control token: 640 '' is not marked as EOG load: control token: 515 '' is not marked as EOG load: control token: 194 '' is not marked as EOG load: control token: 378 '' is not marked as EOG load: control token: 104 '' is not marked as EOG load: control token: 770 '' is not marked as EOG load: control token: 479 '' is not marked as EOG load: control token: 145 '' is not marked as EOG load: control token: 151 '' is not marked as EOG load: control token: 854 '' is not marked as EOG load: control token: 929 '' is not marked as EOG load: control token: 156 '' is not marked as EOG load: control token: 302 '' is not marked as EOG load: control token: 88 '' is not marked as EOG load: control token: 393 '' is not marked as EOG load: control token: 590 '' is not marked as EOG load: control token: 763 '' is not marked as EOG load: control token: 178 '' is not marked as EOG load: control token: 609 '' is not marked as EOG load: control token: 353 '' is not marked as EOG load: control token: 379 '' is not marked as EOG load: control token: 931 '' is not marked as EOG load: control token: 222 '' is not marked as EOG load: control token: 870 '' is not marked as EOG load: control token: 501 '' is not marked as EOG load: control token: 682 '' is not marked as EOG load: control token: 126 '' is not marked as EOG load: control token: 341 '' is not marked as EOG load: control token: 597 '' is not marked as EOG load: control token: 892 '' is not marked as EOG load: control token: 705 '' is not marked as EOG load: control token: 774 '' is not marked as EOG load: control token: 217 '' is not marked as EOG load: control token: 121 '' is not marked as EOG load: control token: 230 '' is not marked as EOG load: control token: 848 '' is not marked as EOG load: control token: 240 '' is not marked as EOG load: control token: 336 '' is not marked as EOG load: control token: 274 '' is not marked as EOG load: control token: 937 '' is not marked as EOG load: control token: 350 '' is not marked as EOG load: control token: 439 '' is not marked as EOG load: control token: 120 '' is not marked as EOG load: control token: 535 '' is not marked as EOG load: control token: 504 '' is not marked as EOG load: control token: 662 '' is not marked as EOG load: control token: 141 '' is not marked as EOG load: control token: 193 '' is not marked as EOG load: control token: 77 '' is not marked as EOG load: control token: 999 '' is not marked as EOG load: control token: 157 '' is not marked as EOG load: control token: 568 '' is not marked as EOG load: control token: 39 '' is not marked as EOG load: control token: 579 '' is not marked as EOG load: control token: 581 '' is not marked as EOG load: control token: 324 '' is not marked as EOG load: control token: 586 '' is not marked as EOG load: control token: 595 '' is not marked as EOG load: control token: 602 '' is not marked as EOG load: control token: 972 '' is not marked as EOG load: control token: 632 '' is not marked as EOG load: control token: 634 '' is not marked as EOG load: control token: 615 '' is not marked as EOG load: control token: 117 '' is not marked as EOG load: control token: 221 '' is not marked as EOG load: control token: 811 '' is not marked as EOG load: control token: 197 '' is not marked as EOG load: control token: 457 '' is not marked as EOG load: control token: 617 '' is not marked as EOG load: control token: 712 '' is not marked as EOG load: control token: 839 '' is not marked as EOG load: control token: 643 '' is not marked as EOG load: control token: 902 '' is not marked as EOG load: control token: 553 '' is not marked as EOG load: control token: 656 '' is not marked as EOG load: control token: 672 '' is not marked as EOG load: control token: 773 '' is not marked as EOG load: control token: 675 '' is not marked as EOG load: control token: 361 '' is not marked as EOG load: control token: 673 '' is not marked as EOG load: control token: 800 '' is not marked as EOG load: control token: 691 '' is not marked as EOG load: control token: 694 '' is not marked as EOG load: control token: 604 '' is not marked as EOG load: control token: 717 '' is not marked as EOG load: control token: 442 '' is not marked as EOG load: control token: 93 '' is not marked as EOG load: control token: 129 '' is not marked as EOG load: control token: 315 '' is not marked as EOG load: control token: 406 '' is not marked as EOG load: control token: 987 '' is not marked as EOG load: control token: 828 '' is not marked as EOG load: control token: 368 '' is not marked as EOG load: control token: 904 '' is not marked as EOG load: control token: 930 '' is not marked as EOG load: control token: 317 '' is not marked as EOG load: control token: 792 '' is not marked as EOG load: control token: 91 '' is not marked as EOG load: control token: 814 '' is not marked as EOG load: control token: 796 '' is not marked as EOG load: control token: 799 '' is not marked as EOG load: control token: 664 '' is not marked as EOG load: control token: 951 '' is not marked as EOG load: control token: 888 '' is not marked as EOG load: control token: 806 '' is not marked as EOG load: control token: 809 '' is not marked as EOG load: control token: 810 '' is not marked as EOG load: control token: 819 '' is not marked as EOG load: control token: 628 '' is not marked as EOG load: control token: 58 '' is not marked as EOG load: control token: 392 '' is not marked as EOG load: control token: 899 '' is not marked as EOG load: control token: 896 '' is not marked as EOG load: control token: 612 '' is not marked as EOG load: control token: 154 '' is not marked as EOG load: control token: 630 '' is not marked as EOG load: control token: 856 '' is not marked as EOG load: control token: 666 '' is not marked as EOG load: control token: 860 '' is not marked as EOG load: control token: 863 '' is not marked as EOG load: control token: 788 '' is not marked as EOG load: control token: 865 '' is not marked as EOG load: control token: 872 '' is not marked as EOG load: control token: 974 '' is not marked as EOG load: control token: 873 '' is not marked as EOG load: control token: 543 '' is not marked as EOG load: control token: 9 '[TOOL_CALLS]' is not marked as EOG load: control token: 665 '' is not marked as EOG load: control token: 354 '' is not marked as EOG load: control token: 243 '' is not marked as EOG load: control token: 637 '' is not marked as EOG load: control token: 735 '' is not marked as EOG load: control token: 901 '' is not marked as EOG load: control token: 363 '' is not marked as EOG load: control token: 821 '' is not marked as EOG load: control token: 41 '' is not marked as EOG load: control token: 920 '' is not marked as EOG load: control token: 928 '' is not marked as EOG load: control token: 220 '' is not marked as EOG load: control token: 942 '' is not marked as EOG load: control token: 947 '' is not marked as EOG load: control token: 556 '' is not marked as EOG load: control token: 962 '' is not marked as EOG load: control token: 889 '' is not marked as EOG load: control token: 235 '' is not marked as EOG load: control token: 927 '' is not marked as EOG load: control token: 967 '' is not marked as EOG load: control token: 968 '' is not marked as EOG load: control token: 78 '' is not marked as EOG load: control token: 980 '' is not marked as EOG load: control token: 223 '' is not marked as EOG load: control token: 360 '' is not marked as EOG load: control token: 822 '' is not marked as EOG load: control token: 342 '' is not marked as EOG load: control token: 294 '' is not marked as EOG load: control token: 715 '' is not marked as EOG load: control token: 187 '' is not marked as EOG load: control token: 248 '' is not marked as EOG load: control token: 481 '' is not marked as EOG load: control token: 548 '' is not marked as EOG load: control token: 110 '' is not marked as EOG load: control token: 687 '' is not marked as EOG load: control token: 593 '' is not marked as EOG load: control token: 259 '' is not marked as EOG load: control token: 686 '' is not marked as EOG load: control token: 188 '' is not marked as EOG load: control token: 229 '' is not marked as EOG load: control token: 24 '' is not marked as EOG load: control token: 131 '' is not marked as EOG load: control token: 253 '' is not marked as EOG load: control token: 318 '' is not marked as EOG load: control token: 633 '' is not marked as EOG load: control token: 716 '' is not marked as EOG load: control token: 685 '' is not marked as EOG load: control token: 224 '' is not marked as EOG load: control token: 180 '' is not marked as EOG load: control token: 572 '' is not marked as EOG load: control token: 463 '' is not marked as EOG load: control token: 689 '' is not marked as EOG load: control token: 0 '' is not marked as EOG load: control token: 758 '' is not marked as EOG load: control token: 674 '' is not marked as EOG load: control token: 329 '' is not marked as EOG load: control token: 525 '' is not marked as EOG load: control token: 25 '' is not marked as EOG load: control token: 573 '' is not marked as EOG load: control token: 833 '' is not marked as EOG load: control token: 847 '' is not marked as EOG load: control token: 334 '' is not marked as EOG load: control token: 289 '' is not marked as EOG load: control token: 923 '' is not marked as EOG load: control token: 64 '' is not marked as EOG load: control token: 784 '' is not marked as EOG load: control token: 791 '' is not marked as EOG load: control token: 404 '' is not marked as EOG load: control token: 952 '' is not marked as EOG load: control token: 8 '[/TOOL_RESULTS]' is not marked as EOG load: control token: 880 '' is not marked as EOG load: control token: 981 '' is not marked as EOG load: control token: 551 '' is not marked as EOG load: control token: 343 '' is not marked as EOG load: control token: 726 '' is not marked as EOG load: control token: 620 '' is not marked as EOG load: control token: 832 '' is not marked as EOG load: control token: 405 '' is not marked as EOG load: control token: 534 '' is not marked as EOG load: control token: 467 '' is not marked as EOG load: control token: 391 '' is not marked as EOG load: control token: 540 '' is not marked as EOG load: control token: 911 '' is not marked as EOG load: control token: 626 '' is not marked as EOG load: control token: 782 '' is not marked as EOG load: control token: 232 '' is not marked as EOG load: control token: 362 '' is not marked as EOG load: control token: 835 '' is not marked as EOG load: control token: 867 '' is not marked as EOG load: control token: 100 '' is not marked as EOG load: control token: 417 '' is not marked as EOG load: control token: 267 '' is not marked as EOG load: control token: 918 '' is not marked as EOG load: control token: 98 '' is not marked as EOG load: control token: 22 '' is not marked as EOG load: control token: 306 '' is not marked as EOG load: control token: 366 '' is not marked as EOG load: control token: 290 '' is not marked as EOG load: control token: 844 '' is not marked as EOG load: control token: 769 '' is not marked as EOG load: control token: 885 '' is not marked as EOG load: control token: 711 '' is not marked as EOG load: control token: 709 '' is not marked as EOG load: control token: 663 '' is not marked as EOG load: control token: 60 '' is not marked as EOG load: control token: 869 '' is not marked as EOG load: control token: 298 '' is not marked as EOG load: control token: 635 '' is not marked as EOG load: control token: 729 '' is not marked as EOG load: control token: 245 '' is not marked as EOG load: control token: 933 '' is not marked as EOG load: control token: 627 '' is not marked as EOG load: control token: 215 '' is not marked as EOG load: control token: 820 '' is not marked as EOG load: control token: 881 '' is not marked as EOG load: control token: 308 '' is not marked as EOG load: control token: 94 '' is not marked as EOG load: control token: 219 '' is not marked as EOG load: control token: 51 '' is not marked as EOG load: control token: 177 '' is not marked as EOG load: control token: 348 '' is not marked as EOG load: control token: 239 '' is not marked as EOG load: control token: 772 '' is not marked as EOG load: control token: 173 '' is not marked as EOG load: control token: 373 '' is not marked as EOG load: control token: 174 '' is not marked as EOG load: control token: 852 '' is not marked as EOG load: control token: 150 '' is not marked as EOG load: control token: 559 '' is not marked as EOG load: control token: 169 '' is not marked as EOG load: control token: 539 '' is not marked as EOG load: control token: 383 '' is not marked as EOG load: control token: 614 '' is not marked as EOG load: control token: 943 '' is not marked as EOG load: control token: 395 '' is not marked as EOG load: control token: 953 '' is not marked as EOG load: control token: 843 '' is not marked as EOG load: control token: 986 '' is not marked as EOG load: control token: 36 '' is not marked as EOG load: control token: 762 '' is not marked as EOG load: control token: 530 '' is not marked as EOG load: control token: 464 '' is not marked as EOG load: control token: 879 '' is not marked as EOG load: control token: 401 '' is not marked as EOG load: control token: 73 '' is not marked as EOG load: control token: 992 '' is not marked as EOG load: control token: 447 '' is not marked as EOG load: control token: 402 '' is not marked as EOG load: control token: 849 '' is not marked as EOG load: control token: 307 '' is not marked as EOG load: control token: 179 '' is not marked as EOG load: control token: 781 '' is not marked as EOG load: control token: 991 '' is not marked as EOG load: control token: 356 '' is not marked as EOG load: control token: 273 '' is not marked as EOG load: control token: 85 '' is not marked as EOG load: control token: 407 '' is not marked as EOG load: control token: 493 '' is not marked as EOG load: control token: 487 '' is not marked as EOG load: control token: 332 '' is not marked as EOG load: control token: 364 '' is not marked as EOG load: control token: 976 '' is not marked as EOG load: control token: 427 '' is not marked as EOG load: control token: 751 '' is not marked as EOG load: control token: 415 '' is not marked as EOG load: control token: 966 '' is not marked as EOG load: control token: 203 '' is not marked as EOG load: control token: 949 '' is not marked as EOG load: control token: 59 '' is not marked as EOG load: control token: 81 '' is not marked as EOG load: control token: 805 '' is not marked as EOG load: control token: 430 '' is not marked as EOG load: control token: 477 '' is not marked as EOG load: control token: 922 '' is not marked as EOG load: control token: 148 '' is not marked as EOG load: control token: 452 '' is not marked as EOG load: control token: 748 '' is not marked as EOG load: control token: 272 '' is not marked as EOG load: control token: 201 '' is not marked as EOG load: control token: 924 '' is not marked as EOG load: control token: 412 '' is not marked as EOG load: control token: 921 '' is not marked as EOG load: control token: 529 '' is not marked as EOG load: control token: 761 '' is not marked as EOG load: control token: 941 '' is not marked as EOG load: control token: 281 '' is not marked as EOG load: control token: 344 '' is not marked as EOG load: control token: 577 '' is not marked as EOG load: control token: 936 '' is not marked as EOG load: control token: 642 '' is not marked as EOG load: control token: 433 '' is not marked as EOG load: control token: 227 '' is not marked as EOG load: control token: 269 '' is not marked as EOG load: control token: 242 '' is not marked as EOG load: control token: 841 '' is not marked as EOG load: control token: 381 '' is not marked as EOG load: control token: 468 '' is not marked as EOG load: control token: 649 '' is not marked as EOG load: control token: 884 '' is not marked as EOG load: control token: 964 '' is not marked as EOG load: control token: 739 '' is not marked as EOG load: control token: 945 '' is not marked as EOG load: control token: 428 '' is not marked as EOG load: control token: 755 '' is not marked as EOG load: control token: 878 '' is not marked as EOG load: control token: 651 '' is not marked as EOG load: control token: 346 '' is not marked as EOG load: control token: 48 '' is not marked as EOG load: control token: 804 '' is not marked as EOG load: control token: 309 '' is not marked as EOG load: control token: 127 '' is not marked as EOG load: control token: 957 '' is not marked as EOG load: control token: 387 '' is not marked as EOG load: control token: 785 '' is not marked as EOG load: control token: 30 '' is not marked as EOG load: control token: 185 '' is not marked as EOG load: control token: 611 '' is not marked as EOG load: control token: 434 '' is not marked as EOG load: control token: 546 '' is not marked as EOG load: control token: 897 '' is not marked as EOG load: control token: 946 '' is not marked as EOG load: control token: 746 '' is not marked as EOG load: control token: 552 '' is not marked as EOG load: control token: 750 '' is not marked as EOG load: control token: 677 '' is not marked as EOG load: control token: 605 '' is not marked as EOG load: control token: 159 '' is not marked as EOG load: control token: 631 '' is not marked as EOG load: control token: 906 '' is not marked as EOG load: control token: 96 '' is not marked as EOG load: control token: 35 '' is not marked as EOG load: control token: 161 '' is not marked as EOG load: control token: 42 '' is not marked as EOG load: control token: 958 '' is not marked as EOG load: control token: 80 '' is not marked as EOG load: control token: 429 '' is not marked as EOG load: control token: 684 '' is not marked as EOG load: control token: 707 '' is not marked as EOG load: control token: 993 '' is not marked as EOG load: control token: 645 '' is not marked as EOG load: control token: 330 '' is not marked as EOG load: control token: 420 '' is not marked as EOG load: control token: 613 '' is not marked as EOG load: control token: 105 '' is not marked as EOG load: control token: 842 '' is not marked as EOG load: control token: 816 '' is not marked as EOG load: control token: 397 '' is not marked as EOG load: control token: 616 '' is not marked as EOG load: control token: 236 '' is not marked as EOG load: control token: 823 '' is not marked as EOG load: control token: 954 '' is not marked as EOG load: control token: 650 '' is not marked as EOG load: control token: 960 '' is not marked as EOG load: control token: 695 '' is not marked as EOG load: control token: 375 '' is not marked as EOG load: control token: 1 '' is not marked as EOG load: control token: 146 '' is not marked as EOG load: control token: 914 '' is not marked as EOG load: control token: 38 '' is not marked as EOG load: control token: 990 '' is not marked as EOG load: control token: 753 '' is not marked as EOG load: control token: 907 '' is not marked as EOG load: control token: 211 '' is not marked as EOG load: control token: 29 '' is not marked as EOG load: control token: 622 '' is not marked as EOG load: control token: 87 '' is not marked as EOG load: control token: 172 '' is not marked as EOG load: control token: 191 '' is not marked as EOG load: control token: 778 '' is not marked as EOG load: control token: 713 '' is not marked as EOG load: control token: 647 '' is not marked as EOG load: control token: 347 '' is not marked as EOG load: control token: 237 '' is not marked as EOG load: control token: 190 '' is not marked as EOG load: control token: 536 '' is not marked as EOG load: control token: 4 '[/INST]' is not marked as EOG load: control token: 893 '' is not marked as EOG load: control token: 569 '' is not marked as EOG load: control token: 54 '' is not marked as EOG load: control token: 790 '' is not marked as EOG load: control token: 667 '' is not marked as EOG load: control token: 123 '' is not marked as EOG load: control token: 975 '' is not marked as EOG load: control token: 808 '' is not marked as EOG load: control token: 600 '' is not marked as EOG load: control token: 912 '' is not marked as EOG load: control token: 520 '' is not marked as EOG load: control token: 740 '' is not marked as EOG load: control token: 132 '' is not marked as EOG load: control token: 659 '' is not marked as EOG load: control token: 508 '' is not marked as EOG load: control token: 658 '' is not marked as EOG load: control token: 780 '' is not marked as EOG load: control token: 260 '' is not marked as EOG load: control token: 567 '' is not marked as EOG load: control token: 195 '' is not marked as EOG load: control token: 282 '' is not marked as EOG load: control token: 278 '' is not marked as EOG load: control token: 453 '' is not marked as EOG load: control token: 532 '' is not marked as EOG load: control token: 28 '' is not marked as EOG load: control token: 697 '' is not marked as EOG load: control token: 502 '' is not marked as EOG load: control token: 226 '' is not marked as EOG load: control token: 858 '' is not marked as EOG load: control token: 836 '' is not marked as EOG load: control token: 636 '' is not marked as EOG load: control token: 116 '' is not marked as EOG load: control token: 676 '' is not marked as EOG load: control token: 466 '' is not marked as EOG load: control token: 521 '' is not marked as EOG load: control token: 483 '' is not marked as EOG load: control token: 718 '' is not marked as EOG load: control token: 764 '' is not marked as EOG load: control token: 978 '' is not marked as EOG load: control token: 624 '' is not marked as EOG load: control token: 388 '' is not marked as EOG load: control token: 266 '' is not marked as EOG load: control token: 670 '' is not marked as EOG load: control token: 875 '' is not marked as EOG load: control token: 550 '' is not marked as EOG load: control token: 384 '' is not marked as EOG load: control token: 759 '' is not marked as EOG load: control token: 254 '' is not marked as EOG load: control token: 443 '' is not marked as EOG load: control token: 655 '' is not marked as EOG load: control token: 996 '' is not marked as EOG load: control token: 558 '' is not marked as EOG load: control token: 339 '' is not marked as EOG load: control token: 830 '' is not marked as EOG load: control token: 528 '' is not marked as EOG load: control token: 768 '' is not marked as EOG load: control token: 591 '' is not marked as EOG load: control token: 876 '' is not marked as EOG load: control token: 730 '' is not marked as EOG load: control token: 571 '' is not marked as EOG load: control token: 629 '' is not marked as EOG load: control token: 270 '' is not marked as EOG load: control token: 742 '' is not marked as EOG load: control token: 783 '' is not marked as EOG load: control token: 258 '' is not marked as EOG load: control token: 67 '' is not marked as EOG load: control token: 575 '' is not marked as EOG load: control token: 212 '' is not marked as EOG load: control token: 3 '[INST]' is not marked as EOG load: control token: 813 '' is not marked as EOG load: control token: 143 '' is not marked as EOG load: control token: 149 '' is not marked as EOG load: control token: 192 '' is not marked as EOG load: control token: 669 '' is not marked as EOG load: control token: 648 '' is not marked as EOG load: control token: 932 '' is not marked as EOG load: control token: 765 '' is not marked as EOG load: control token: 113 '' is not marked as EOG load: control token: 33 '' is not marked as EOG load: control token: 533 '' is not marked as EOG load: control token: 103 '' is not marked as EOG load: control token: 544 '' is not marked as EOG load: control token: 112 '' is not marked as EOG load: control token: 56 '' is not marked as EOG load: control token: 114 '' is not marked as EOG load: control token: 163 '' is not marked as EOG load: control token: 108 '' is not marked as EOG load: control token: 310 '' is not marked as EOG load: control token: 606 '' is not marked as EOG load: control token: 410 '' is not marked as EOG load: control token: 297 '' is not marked as EOG load: control token: 256 '' is not marked as EOG load: control token: 851 '' is not marked as EOG load: control token: 812 '' is not marked as EOG load: control token: 754 '' is not marked as EOG load: control token: 225 '' is not marked as EOG load: control token: 416 '' is not marked as EOG load: control token: 23 '' is not marked as EOG load: control token: 733 '' is not marked as EOG load: control token: 268 '' is not marked as EOG load: control token: 576 '' is not marked as EOG load: control token: 596 '' is not marked as EOG load: control token: 326 '' is not marked as EOG load: control token: 128 '' is not marked as EOG load: control token: 507 '' is not marked as EOG load: control token: 293 '' is not marked as EOG load: control token: 728 '' is not marked as EOG load: control token: 292 '' is not marked as EOG load: control token: 472 '' is not marked as EOG load: control token: 84 '' is not marked as EOG load: control token: 99 '' is not marked as EOG load: control token: 390 '' is not marked as EOG load: control token: 26 '' is not marked as EOG load: control token: 74 '' is not marked as EOG load: control token: 995 '' is not marked as EOG load: control token: 162 '' is not marked as EOG load: control token: 877 '' is not marked as EOG load: control token: 603 '' is not marked as EOG load: control token: 198 '' is not marked as EOG load: control token: 657 '' is not marked as EOG load: control token: 167 '' is not marked as EOG load: control token: 998 '' is not marked as EOG load: control token: 789 '' is not marked as EOG load: control token: 97 '' is not marked as EOG load: control token: 152 '' is not marked as EOG load: control token: 725 '' is not marked as EOG load: control token: 545 '' is not marked as EOG load: control token: 351 '' is not marked as EOG load: control token: 345 '' is not marked as EOG load: control token: 111 '' is not marked as EOG load: control token: 286 '' is not marked as EOG load: control token: 331 '' is not marked as EOG load: control token: 376 '' is not marked as EOG load: control token: 731 '' is not marked as EOG load: control token: 935 '' is not marked as EOG load: control token: 196 '' is not marked as EOG load: control token: 965 '' is not marked as EOG load: control token: 469 '' is not marked as EOG load: control token: 882 '' is not marked as EOG load: control token: 106 '' is not marked as EOG load: control token: 322 '' is not marked as EOG load: control token: 639 '' is not marked as EOG load: control token: 287 '' is not marked as EOG load: control token: 853 '' is not marked as EOG load: control token: 153 '' is not marked as EOG load: control token: 5 '[AVAILABLE_TOOLS]' is not marked as EOG load: control token: 44 '' is not marked as EOG load: control token: 313 '' is not marked as EOG load: control token: 680 '' is not marked as EOG load: control token: 793 '' is not marked as EOG load: control token: 702 '' is not marked as EOG load: control token: 102 '' is not marked as EOG load: control token: 299 '' is not marked as EOG load: control token: 76 '' is not marked as EOG load: printing all EOG tokens: load: - 11 ('<|im_end|>') load: special tokens cache size = 1000 load: token to piece cache size = 0.8499 MB print_info: arch = nemotron_h_moe print_info: vocab_only = 0 print_info: no_alloc = 0 print_info: n_ctx_train = 1048576 print_info: n_embd = 2688 print_info: n_embd_inp = 2688 print_info: n_layer = 52 print_info: n_head = 32 print_info: n_head_kv = [0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0] print_info: n_rot = 84 print_info: n_swa = 0 print_info: is_swa_any = 0 print_info: n_embd_head_k = 128 print_info: n_embd_head_v = 128 print_info: n_gqa = [0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0] print_info: n_embd_k_gqa = [0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0] print_info: n_embd_v_gqa = [0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0] print_info: f_norm_eps = 0.0e+00 print_info: f_norm_rms_eps = 1.0e-05 print_info: f_clamp_kqv = 0.0e+00 print_info: f_max_alibi_bias = 0.0e+00 print_info: f_logit_scale = 0.0e+00 print_info: f_attn_scale = 0.0e+00 print_info: n_ff = [0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 1856, 0, 0, 1856, 0, 1856, 0, 1856, 0, 1856, 0, 1856] print_info: n_expert = 128 print_info: n_expert_used = 6 print_info: n_expert_groups = 1 print_info: n_group_used = 1 print_info: causal attn = 1 print_info: pooling type = 0 print_info: rope type = -1 print_info: rope scaling = linear print_info: freq_base_train = 10000.0 print_info: freq_scale_train = 1 print_info: n_ctx_orig_yarn = 1048576 print_info: rope_yarn_log_mul= 0.0000 print_info: rope_finetuned = unknown print_info: ssm_d_conv = 4 print_info: ssm_d_inner = 4096 print_info: ssm_d_state = 128 print_info: ssm_dt_rank = 64 print_info: ssm_n_group = 8 print_info: ssm_dt_b_c_rms = 0 print_info: model type = 31B.A3.5B print_info: model params = 31.58 B print_info: general.name = Nano v3 Llm_Vv1.0.1 Ga print_info: f_embedding_scale = 0.000000 print_info: f_residual_scale = 0.000000 print_info: f_attention_scale = 0.000000 print_info: n_ff_shexp = 3712 print_info: vocab type = BPE print_info: n_vocab = 131072 print_info: n_merges = 269443 print_info: BOS token = 1 '' print_info: EOS token = 11 '<|im_end|>' print_info: EOT token = 11 '<|im_end|>' print_info: UNK token = 0 '' print_info: PAD token = 0 '' print_info: LF token = 1010 'Ċ' print_info: EOG token = 11 '<|im_end|>' print_info: max token length = 150 load_tensors: loading model tensors, this can take a while... (mmap = true) load_tensors: layer 0 assigned to device CUDA0, is_swa = 0 load_tensors: layer 1 assigned to device CUDA0, is_swa = 0 load_tensors: layer 2 assigned to device CUDA0, is_swa = 0 load_tensors: layer 3 assigned to device CUDA0, is_swa = 0 load_tensors: layer 4 assigned to device CUDA0, is_swa = 0 load_tensors: layer 5 assigned to device CUDA0, is_swa = 0 load_tensors: layer 6 assigned to device CUDA0, is_swa = 0 load_tensors: layer 7 assigned to device CUDA0, is_swa = 0 load_tensors: layer 8 assigned to device CUDA0, is_swa = 0 load_tensors: layer 9 assigned to device CUDA0, is_swa = 0 load_tensors: layer 10 assigned to device CUDA0, is_swa = 0 load_tensors: layer 11 assigned to device CUDA0, is_swa = 0 load_tensors: layer 12 assigned to device CUDA0, is_swa = 0 load_tensors: layer 13 assigned to device CUDA0, is_swa = 0 load_tensors: layer 14 assigned to device CUDA0, is_swa = 0 load_tensors: layer 15 assigned to device CUDA0, is_swa = 0 load_tensors: layer 16 assigned to device CUDA0, is_swa = 0 load_tensors: layer 17 assigned to device CUDA0, is_swa = 0 load_tensors: layer 18 assigned to device CUDA0, is_swa = 0 load_tensors: layer 19 assigned to device CUDA0, is_swa = 0 load_tensors: layer 20 assigned to device CUDA0, is_swa = 0 load_tensors: layer 21 assigned to device CUDA0, is_swa = 0 load_tensors: layer 22 assigned to device CUDA0, is_swa = 0 load_tensors: layer 23 assigned to device CUDA0, is_swa = 0 load_tensors: layer 24 assigned to device CUDA0, is_swa = 0 load_tensors: layer 25 assigned to device CUDA0, is_swa = 0 load_tensors: layer 26 assigned to device CUDA0, is_swa = 0 load_tensors: layer 27 assigned to device CUDA0, is_swa = 0 load_tensors: layer 28 assigned to device CUDA0, is_swa = 0 load_tensors: layer 29 assigned to device CUDA0, is_swa = 0 load_tensors: layer 30 assigned to device CUDA0, is_swa = 0 load_tensors: layer 31 assigned to device CUDA0, is_swa = 0 load_tensors: layer 32 assigned to device CUDA0, is_swa = 0 load_tensors: layer 33 assigned to device CUDA0, is_swa = 0 load_tensors: layer 34 assigned to device CUDA0, is_swa = 0 load_tensors: layer 35 assigned to device CUDA0, is_swa = 0 load_tensors: layer 36 assigned to device CUDA0, is_swa = 0 load_tensors: layer 37 assigned to device CUDA0, is_swa = 0 load_tensors: layer 38 assigned to device CUDA0, is_swa = 0 load_tensors: layer 39 assigned to device CUDA0, is_swa = 0 load_tensors: layer 40 assigned to device CUDA0, is_swa = 0 load_tensors: layer 41 assigned to device CUDA0, is_swa = 0 load_tensors: layer 42 assigned to device CUDA0, is_swa = 0 load_tensors: layer 43 assigned to device CUDA0, is_swa = 0 load_tensors: layer 44 assigned to device CUDA0, is_swa = 0 load_tensors: layer 45 assigned to device CUDA0, is_swa = 0 load_tensors: layer 46 assigned to device CUDA0, is_swa = 0 load_tensors: layer 47 assigned to device CUDA0, is_swa = 0 load_tensors: layer 48 assigned to device CUDA0, is_swa = 0 load_tensors: layer 49 assigned to device CUDA0, is_swa = 0 load_tensors: layer 50 assigned to device CUDA0, is_swa = 0 load_tensors: layer 51 assigned to device CUDA0, is_swa = 0 load_tensors: layer 52 assigned to device CUDA0, is_swa = 0 create_tensor: loading tensor token_embd.weight create_tensor: loading tensor output_norm.weight create_tensor: loading tensor output.weight create_tensor: loading tensor blk.0.attn_norm.weight create_tensor: loading tensor blk.0.ssm_in.weight create_tensor: loading tensor blk.0.ssm_conv1d.weight create_tensor: loading tensor blk.0.ssm_conv1d.bias create_tensor: loading tensor blk.0.ssm_dt.bias create_tensor: loading tensor blk.0.ssm_a create_tensor: loading tensor blk.0.ssm_d create_tensor: loading tensor blk.0.ssm_norm.weight create_tensor: loading tensor blk.0.ssm_out.weight create_tensor: loading tensor blk.1.attn_norm.weight create_tensor: loading tensor blk.1.ffn_gate_inp.weight create_tensor: loading tensor blk.1.exp_probs_b.bias create_tensor: loading tensor blk.1.ffn_down_exps.weight create_tensor: loading tensor blk.1.ffn_up_exps.weight create_tensor: loading tensor blk.1.ffn_down_shexp.weight create_tensor: loading tensor blk.1.ffn_up_shexp.weight create_tensor: loading tensor blk.2.attn_norm.weight create_tensor: loading tensor blk.2.ssm_in.weight create_tensor: loading tensor blk.2.ssm_conv1d.weight create_tensor: loading tensor blk.2.ssm_conv1d.bias create_tensor: loading tensor blk.2.ssm_dt.bias create_tensor: loading tensor blk.2.ssm_a create_tensor: loading tensor blk.2.ssm_d create_tensor: loading tensor blk.2.ssm_norm.weight create_tensor: loading tensor blk.2.ssm_out.weight create_tensor: loading tensor blk.3.attn_norm.weight create_tensor: loading tensor blk.3.ffn_gate_inp.weight create_tensor: loading tensor blk.3.exp_probs_b.bias create_tensor: loading tensor blk.3.ffn_down_exps.weight create_tensor: loading tensor blk.3.ffn_up_exps.weight create_tensor: loading tensor blk.3.ffn_down_shexp.weight create_tensor: loading tensor blk.3.ffn_up_shexp.weight create_tensor: loading tensor blk.4.attn_norm.weight create_tensor: loading tensor blk.4.ssm_in.weight create_tensor: loading tensor blk.4.ssm_conv1d.weight create_tensor: loading tensor blk.4.ssm_conv1d.bias create_tensor: loading tensor blk.4.ssm_dt.bias create_tensor: loading tensor blk.4.ssm_a create_tensor: loading tensor blk.4.ssm_d create_tensor: loading tensor blk.4.ssm_norm.weight create_tensor: loading tensor blk.4.ssm_out.weight create_tensor: loading tensor blk.5.attn_norm.weight create_tensor: loading tensor blk.5.attn_q.weight create_tensor: loading tensor blk.5.attn_k.weight create_tensor: loading tensor blk.5.attn_v.weight create_tensor: loading tensor blk.5.attn_output.weight create_tensor: loading tensor blk.6.attn_norm.weight create_tensor: loading tensor blk.6.ffn_gate_inp.weight create_tensor: loading tensor blk.6.exp_probs_b.bias create_tensor: loading tensor blk.6.ffn_down_exps.weight create_tensor: loading tensor blk.6.ffn_up_exps.weight create_tensor: loading tensor blk.6.ffn_down_shexp.weight create_tensor: loading tensor blk.6.ffn_up_shexp.weight create_tensor: loading tensor blk.7.attn_norm.weight create_tensor: loading tensor blk.7.ssm_in.weight create_tensor: loading tensor blk.7.ssm_conv1d.weight create_tensor: loading tensor blk.7.ssm_conv1d.bias create_tensor: loading tensor blk.7.ssm_dt.bias create_tensor: loading tensor blk.7.ssm_a create_tensor: loading tensor blk.7.ssm_d create_tensor: loading tensor blk.7.ssm_norm.weight create_tensor: loading tensor blk.7.ssm_out.weight create_tensor: loading tensor blk.8.attn_norm.weight create_tensor: loading tensor blk.8.ffn_gate_inp.weight create_tensor: loading tensor blk.8.exp_probs_b.bias create_tensor: loading tensor blk.8.ffn_down_exps.weight create_tensor: loading tensor blk.8.ffn_up_exps.weight create_tensor: loading tensor blk.8.ffn_down_shexp.weight create_tensor: loading tensor blk.8.ffn_up_shexp.weight create_tensor: loading tensor blk.9.attn_norm.weight create_tensor: loading tensor blk.9.ssm_in.weight create_tensor: loading tensor blk.9.ssm_conv1d.weight create_tensor: loading tensor blk.9.ssm_conv1d.bias create_tensor: loading tensor blk.9.ssm_dt.bias create_tensor: loading tensor blk.9.ssm_a create_tensor: loading tensor blk.9.ssm_d create_tensor: loading tensor blk.9.ssm_norm.weight create_tensor: loading tensor blk.9.ssm_out.weight create_tensor: loading tensor blk.10.attn_norm.weight create_tensor: loading tensor blk.10.ffn_gate_inp.weight create_tensor: loading tensor blk.10.exp_probs_b.bias create_tensor: loading tensor blk.10.ffn_down_exps.weight create_tensor: loading tensor blk.10.ffn_up_exps.weight create_tensor: loading tensor blk.10.ffn_down_shexp.weight create_tensor: loading tensor blk.10.ffn_up_shexp.weight create_tensor: loading tensor blk.11.attn_norm.weight create_tensor: loading tensor blk.11.ssm_in.weight create_tensor: loading tensor blk.11.ssm_conv1d.weight create_tensor: loading tensor blk.11.ssm_conv1d.bias create_tensor: loading tensor blk.11.ssm_dt.bias create_tensor: loading tensor blk.11.ssm_a create_tensor: loading tensor blk.11.ssm_d create_tensor: loading tensor blk.11.ssm_norm.weight create_tensor: loading tensor blk.11.ssm_out.weight create_tensor: loading tensor blk.12.attn_norm.weight create_tensor: loading tensor blk.12.attn_q.weight create_tensor: loading tensor blk.12.attn_k.weight create_tensor: loading tensor blk.12.attn_v.weight create_tensor: loading tensor blk.12.attn_output.weight create_tensor: loading tensor blk.13.attn_norm.weight create_tensor: loading tensor blk.13.ffn_gate_inp.weight create_tensor: loading tensor blk.13.exp_probs_b.bias create_tensor: loading tensor blk.13.ffn_down_exps.weight create_tensor: loading tensor blk.13.ffn_up_exps.weight create_tensor: loading tensor blk.13.ffn_down_shexp.weight create_tensor: loading tensor blk.13.ffn_up_shexp.weight create_tensor: loading tensor blk.14.attn_norm.weight create_tensor: loading tensor blk.14.ssm_in.weight create_tensor: loading tensor blk.14.ssm_conv1d.weight create_tensor: loading tensor blk.14.ssm_conv1d.bias create_tensor: loading tensor blk.14.ssm_dt.bias create_tensor: loading tensor blk.14.ssm_a create_tensor: loading tensor blk.14.ssm_d create_tensor: loading tensor blk.14.ssm_norm.weight create_tensor: loading tensor blk.14.ssm_out.weight create_tensor: loading tensor blk.15.attn_norm.weight create_tensor: loading tensor blk.15.ffn_gate_inp.weight create_tensor: loading tensor blk.15.exp_probs_b.bias create_tensor: loading tensor blk.15.ffn_down_exps.weight create_tensor: loading tensor blk.15.ffn_up_exps.weight create_tensor: loading tensor blk.15.ffn_down_shexp.weight create_tensor: loading tensor blk.15.ffn_up_shexp.weight create_tensor: loading tensor blk.16.attn_norm.weight create_tensor: loading tensor blk.16.ssm_in.weight create_tensor: loading tensor blk.16.ssm_conv1d.weight create_tensor: loading tensor blk.16.ssm_conv1d.bias create_tensor: loading tensor blk.16.ssm_dt.bias create_tensor: loading tensor blk.16.ssm_a create_tensor: loading tensor blk.16.ssm_d create_tensor: loading tensor blk.16.ssm_norm.weight create_tensor: loading tensor blk.16.ssm_out.weight create_tensor: loading tensor blk.17.attn_norm.weight create_tensor: loading tensor blk.17.ffn_gate_inp.weight create_tensor: loading tensor blk.17.exp_probs_b.bias create_tensor: loading tensor blk.17.ffn_down_exps.weight create_tensor: loading tensor blk.17.ffn_up_exps.weight create_tensor: loading tensor blk.17.ffn_down_shexp.weight create_tensor: loading tensor blk.17.ffn_up_shexp.weight create_tensor: loading tensor blk.18.attn_norm.weight create_tensor: loading tensor blk.18.ssm_in.weight create_tensor: loading tensor blk.18.ssm_conv1d.weight create_tensor: loading tensor blk.18.ssm_conv1d.bias create_tensor: loading tensor blk.18.ssm_dt.bias create_tensor: loading tensor blk.18.ssm_a create_tensor: loading tensor blk.18.ssm_d create_tensor: loading tensor blk.18.ssm_norm.weight create_tensor: loading tensor blk.18.ssm_out.weight create_tensor: loading tensor blk.19.attn_norm.weight create_tensor: loading tensor blk.19.attn_q.weight create_tensor: loading tensor blk.19.attn_k.weight create_tensor: loading tensor blk.19.attn_v.weight create_tensor: loading tensor blk.19.attn_output.weight create_tensor: loading tensor blk.20.attn_norm.weight create_tensor: loading tensor blk.20.ffn_gate_inp.weight create_tensor: loading tensor blk.20.exp_probs_b.bias create_tensor: loading tensor blk.20.ffn_down_exps.weight create_tensor: loading tensor blk.20.ffn_up_exps.weight create_tensor: loading tensor blk.20.ffn_down_shexp.weight create_tensor: loading tensor blk.20.ffn_up_shexp.weight create_tensor: loading tensor blk.21.attn_norm.weight create_tensor: loading tensor blk.21.ssm_in.weight create_tensor: loading tensor blk.21.ssm_conv1d.weight create_tensor: loading tensor blk.21.ssm_conv1d.bias create_tensor: loading tensor blk.21.ssm_dt.bias create_tensor: loading tensor blk.21.ssm_a create_tensor: loading tensor blk.21.ssm_d create_tensor: loading tensor blk.21.ssm_norm.weight create_tensor: loading tensor blk.21.ssm_out.weight create_tensor: loading tensor blk.22.attn_norm.weight create_tensor: loading tensor blk.22.ffn_gate_inp.weight create_tensor: loading tensor blk.22.exp_probs_b.bias create_tensor: loading tensor blk.22.ffn_down_exps.weight create_tensor: loading tensor blk.22.ffn_up_exps.weight create_tensor: loading tensor blk.22.ffn_down_shexp.weight create_tensor: loading tensor blk.22.ffn_up_shexp.weight create_tensor: loading tensor blk.23.attn_norm.weight create_tensor: loading tensor blk.23.ssm_in.weight create_tensor: loading tensor blk.23.ssm_conv1d.weight create_tensor: loading tensor blk.23.ssm_conv1d.bias create_tensor: loading tensor blk.23.ssm_dt.bias create_tensor: loading tensor blk.23.ssm_a create_tensor: loading tensor blk.23.ssm_d create_tensor: loading tensor blk.23.ssm_norm.weight create_tensor: loading tensor blk.23.ssm_out.weight create_tensor: loading tensor blk.24.attn_norm.weight create_tensor: loading tensor blk.24.ffn_gate_inp.weight create_tensor: loading tensor blk.24.exp_probs_b.bias create_tensor: loading tensor blk.24.ffn_down_exps.weight create_tensor: loading tensor blk.24.ffn_up_exps.weight create_tensor: loading tensor blk.24.ffn_down_shexp.weight create_tensor: loading tensor blk.24.ffn_up_shexp.weight create_tensor: loading tensor blk.25.attn_norm.weight create_tensor: loading tensor blk.25.ssm_in.weight create_tensor: loading tensor blk.25.ssm_conv1d.weight create_tensor: loading tensor blk.25.ssm_conv1d.bias create_tensor: loading tensor blk.25.ssm_dt.bias create_tensor: loading tensor blk.25.ssm_a create_tensor: loading tensor blk.25.ssm_d create_tensor: loading tensor blk.25.ssm_norm.weight create_tensor: loading tensor blk.25.ssm_out.weight create_tensor: loading tensor blk.26.attn_norm.weight create_tensor: loading tensor blk.26.attn_q.weight create_tensor: loading tensor blk.26.attn_k.weight create_tensor: loading tensor blk.26.attn_v.weight create_tensor: loading tensor blk.26.attn_output.weight create_tensor: loading tensor blk.27.attn_norm.weight create_tensor: loading tensor blk.27.ffn_gate_inp.weight create_tensor: loading tensor blk.27.exp_probs_b.bias create_tensor: loading tensor blk.27.ffn_down_exps.weight create_tensor: loading tensor blk.27.ffn_up_exps.weight create_tensor: loading tensor blk.27.ffn_down_shexp.weight create_tensor: loading tensor blk.27.ffn_up_shexp.weight create_tensor: loading tensor blk.28.attn_norm.weight create_tensor: loading tensor blk.28.ssm_in.weight create_tensor: loading tensor blk.28.ssm_conv1d.weight create_tensor: loading tensor blk.28.ssm_conv1d.bias create_tensor: loading tensor blk.28.ssm_dt.bias create_tensor: loading tensor blk.28.ssm_a create_tensor: loading tensor blk.28.ssm_d create_tensor: loading tensor blk.28.ssm_norm.weight create_tensor: loading tensor blk.28.ssm_out.weight create_tensor: loading tensor blk.29.attn_norm.weight create_tensor: loading tensor blk.29.ffn_gate_inp.weight create_tensor: loading tensor blk.29.exp_probs_b.bias create_tensor: loading tensor blk.29.ffn_down_exps.weight create_tensor: loading tensor blk.29.ffn_up_exps.weight create_tensor: loading tensor blk.29.ffn_down_shexp.weight create_tensor: loading tensor blk.29.ffn_up_shexp.weight create_tensor: loading tensor blk.30.attn_norm.weight create_tensor: loading tensor blk.30.ssm_in.weight create_tensor: loading tensor blk.30.ssm_conv1d.weight create_tensor: loading tensor blk.30.ssm_conv1d.bias create_tensor: loading tensor blk.30.ssm_dt.bias create_tensor: loading tensor blk.30.ssm_a create_tensor: loading tensor blk.30.ssm_d create_tensor: loading tensor blk.30.ssm_norm.weight create_tensor: loading tensor blk.30.ssm_out.weight create_tensor: loading tensor blk.31.attn_norm.weight create_tensor: loading tensor blk.31.ffn_gate_inp.weight create_tensor: loading tensor blk.31.exp_probs_b.bias create_tensor: loading tensor blk.31.ffn_down_exps.weight create_tensor: loading tensor blk.31.ffn_up_exps.weight create_tensor: loading tensor blk.31.ffn_down_shexp.weight create_tensor: loading tensor blk.31.ffn_up_shexp.weight create_tensor: loading tensor blk.32.attn_norm.weight create_tensor: loading tensor blk.32.ssm_in.weight create_tensor: loading tensor blk.32.ssm_conv1d.weight create_tensor: loading tensor blk.32.ssm_conv1d.bias create_tensor: loading tensor blk.32.ssm_dt.bias create_tensor: loading tensor blk.32.ssm_a create_tensor: loading tensor blk.32.ssm_d create_tensor: loading tensor blk.32.ssm_norm.weight create_tensor: loading tensor blk.32.ssm_out.weight create_tensor: loading tensor blk.33.attn_norm.weight create_tensor: loading tensor blk.33.attn_q.weight create_tensor: loading tensor blk.33.attn_k.weight create_tensor: loading tensor blk.33.attn_v.weight create_tensor: loading tensor blk.33.attn_output.weight create_tensor: loading tensor blk.34.attn_norm.weight create_tensor: loading tensor blk.34.ffn_gate_inp.weight create_tensor: loading tensor blk.34.exp_probs_b.bias create_tensor: loading tensor blk.34.ffn_down_exps.weight create_tensor: loading tensor blk.34.ffn_up_exps.weight create_tensor: loading tensor blk.34.ffn_down_shexp.weight create_tensor: loading tensor blk.34.ffn_up_shexp.weight create_tensor: loading tensor blk.35.attn_norm.weight create_tensor: loading tensor blk.35.ssm_in.weight create_tensor: loading tensor blk.35.ssm_conv1d.weight create_tensor: loading tensor blk.35.ssm_conv1d.bias create_tensor: loading tensor blk.35.ssm_dt.bias create_tensor: loading tensor blk.35.ssm_a create_tensor: loading tensor blk.35.ssm_d create_tensor: loading tensor blk.35.ssm_norm.weight create_tensor: loading tensor blk.35.ssm_out.weight create_tensor: loading tensor blk.36.attn_norm.weight create_tensor: loading tensor blk.36.ffn_gate_inp.weight create_tensor: loading tensor blk.36.exp_probs_b.bias create_tensor: loading tensor blk.36.ffn_down_exps.weight create_tensor: loading tensor blk.36.ffn_up_exps.weight create_tensor: loading tensor blk.36.ffn_down_shexp.weight create_tensor: loading tensor blk.36.ffn_up_shexp.weight create_tensor: loading tensor blk.37.attn_norm.weight create_tensor: loading tensor blk.37.ssm_in.weight create_tensor: loading tensor blk.37.ssm_conv1d.weight create_tensor: loading tensor blk.37.ssm_conv1d.bias create_tensor: loading tensor blk.37.ssm_dt.bias create_tensor: loading tensor blk.37.ssm_a create_tensor: loading tensor blk.37.ssm_d create_tensor: loading tensor blk.37.ssm_norm.weight create_tensor: loading tensor blk.37.ssm_out.weight create_tensor: loading tensor blk.38.attn_norm.weight create_tensor: loading tensor blk.38.ffn_gate_inp.weight create_tensor: loading tensor blk.38.exp_probs_b.bias create_tensor: loading tensor blk.38.ffn_down_exps.weight create_tensor: loading tensor blk.38.ffn_up_exps.weight create_tensor: loading tensor blk.38.ffn_down_shexp.weight create_tensor: loading tensor blk.38.ffn_up_shexp.weight create_tensor: loading tensor blk.39.attn_norm.weight create_tensor: loading tensor blk.39.ssm_in.weight create_tensor: loading tensor blk.39.ssm_conv1d.weight create_tensor: loading tensor blk.39.ssm_conv1d.bias create_tensor: loading tensor blk.39.ssm_dt.bias create_tensor: loading tensor blk.39.ssm_a create_tensor: loading tensor blk.39.ssm_d create_tensor: loading tensor blk.39.ssm_norm.weight create_tensor: loading tensor blk.39.ssm_out.weight create_tensor: loading tensor blk.40.attn_norm.weight create_tensor: loading tensor blk.40.ffn_gate_inp.weight create_tensor: loading tensor blk.40.exp_probs_b.bias create_tensor: loading tensor blk.40.ffn_down_exps.weight create_tensor: loading tensor blk.40.ffn_up_exps.weight create_tensor: loading tensor blk.40.ffn_down_shexp.weight create_tensor: loading tensor blk.40.ffn_up_shexp.weight create_tensor: loading tensor blk.41.attn_norm.weight create_tensor: loading tensor blk.41.ssm_in.weight create_tensor: loading tensor blk.41.ssm_conv1d.weight create_tensor: loading tensor blk.41.ssm_conv1d.bias create_tensor: loading tensor blk.41.ssm_dt.bias create_tensor: loading tensor blk.41.ssm_a create_tensor: loading tensor blk.41.ssm_d create_tensor: loading tensor blk.41.ssm_norm.weight create_tensor: loading tensor blk.41.ssm_out.weight create_tensor: loading tensor blk.42.attn_norm.weight create_tensor: loading tensor blk.42.attn_q.weight create_tensor: loading tensor blk.42.attn_k.weight create_tensor: loading tensor blk.42.attn_v.weight create_tensor: loading tensor blk.42.attn_output.weight create_tensor: loading tensor blk.43.attn_norm.weight create_tensor: loading tensor blk.43.ffn_gate_inp.weight create_tensor: loading tensor blk.43.exp_probs_b.bias create_tensor: loading tensor blk.43.ffn_down_exps.weight create_tensor: loading tensor blk.43.ffn_up_exps.weight create_tensor: loading tensor blk.43.ffn_down_shexp.weight create_tensor: loading tensor blk.43.ffn_up_shexp.weight create_tensor: loading tensor blk.44.attn_norm.weight create_tensor: loading tensor blk.44.ssm_in.weight create_tensor: loading tensor blk.44.ssm_conv1d.weight create_tensor: loading tensor blk.44.ssm_conv1d.bias create_tensor: loading tensor blk.44.ssm_dt.bias create_tensor: loading tensor blk.44.ssm_a create_tensor: loading tensor blk.44.ssm_d create_tensor: loading tensor blk.44.ssm_norm.weight create_tensor: loading tensor blk.44.ssm_out.weight create_tensor: loading tensor blk.45.attn_norm.weight create_tensor: loading tensor blk.45.ffn_gate_inp.weight create_tensor: loading tensor blk.45.exp_probs_b.bias create_tensor: loading tensor blk.45.ffn_down_exps.weight create_tensor: loading tensor blk.45.ffn_up_exps.weight create_tensor: loading tensor blk.45.ffn_down_shexp.weight create_tensor: loading tensor blk.45.ffn_up_shexp.weight create_tensor: loading tensor blk.46.attn_norm.weight create_tensor: loading tensor blk.46.ssm_in.weight create_tensor: loading tensor blk.46.ssm_conv1d.weight create_tensor: loading tensor blk.46.ssm_conv1d.bias create_tensor: loading tensor blk.46.ssm_dt.bias create_tensor: loading tensor blk.46.ssm_a create_tensor: loading tensor blk.46.ssm_d create_tensor: loading tensor blk.46.ssm_norm.weight create_tensor: loading tensor blk.46.ssm_out.weight create_tensor: loading tensor blk.47.attn_norm.weight create_tensor: loading tensor blk.47.ffn_gate_inp.weight create_tensor: loading tensor blk.47.exp_probs_b.bias create_tensor: loading tensor blk.47.ffn_down_exps.weight create_tensor: loading tensor blk.47.ffn_up_exps.weight create_tensor: loading tensor blk.47.ffn_down_shexp.weight create_tensor: loading tensor blk.47.ffn_up_shexp.weight create_tensor: loading tensor blk.48.attn_norm.weight create_tensor: loading tensor blk.48.ssm_in.weight create_tensor: loading tensor blk.48.ssm_conv1d.weight create_tensor: loading tensor blk.48.ssm_conv1d.bias create_tensor: loading tensor blk.48.ssm_dt.bias create_tensor: loading tensor blk.48.ssm_a create_tensor: loading tensor blk.48.ssm_d create_tensor: loading tensor blk.48.ssm_norm.weight create_tensor: loading tensor blk.48.ssm_out.weight create_tensor: loading tensor blk.49.attn_norm.weight create_tensor: loading tensor blk.49.ffn_gate_inp.weight create_tensor: loading tensor blk.49.exp_probs_b.bias create_tensor: loading tensor blk.49.ffn_down_exps.weight create_tensor: loading tensor blk.49.ffn_up_exps.weight create_tensor: loading tensor blk.49.ffn_down_shexp.weight create_tensor: loading tensor blk.49.ffn_up_shexp.weight create_tensor: loading tensor blk.50.attn_norm.weight create_tensor: loading tensor blk.50.ssm_in.weight create_tensor: loading tensor blk.50.ssm_conv1d.weight create_tensor: loading tensor blk.50.ssm_conv1d.bias create_tensor: loading tensor blk.50.ssm_dt.bias create_tensor: loading tensor blk.50.ssm_a create_tensor: loading tensor blk.50.ssm_d create_tensor: loading tensor blk.50.ssm_norm.weight create_tensor: loading tensor blk.50.ssm_out.weight create_tensor: loading tensor blk.51.attn_norm.weight create_tensor: loading tensor blk.51.ffn_gate_inp.weight create_tensor: loading tensor blk.51.exp_probs_b.bias create_tensor: loading tensor blk.51.ffn_down_exps.weight create_tensor: loading tensor blk.51.ffn_up_exps.weight create_tensor: loading tensor blk.51.ffn_down_shexp.weight create_tensor: loading tensor blk.51.ffn_up_shexp.weight load_tensors: tensor 'token_embd.weight' (q5_0) (and 0 others) cannot be used with preferred buffer type CUDA_Host, using CPU instead load_tensors: offloading output layer to GPU load_tensors: offloading 51 repeating layers to GPU load_tensors: offloaded 53/53 layers to GPU load_tensors: CPU_Mapped model buffer size = 231.00 MiB load_tensors: CUDA0 model buffer size = 23140.97 MiB .................................................... llama_context: constructing llama_context llama_context: n_seq_max = 1 llama_context: n_ctx = 49152 llama_context: n_ctx_seq = 49152 llama_context: n_batch = 512 llama_context: n_ubatch = 512 llama_context: causal_attn = 1 llama_context: flash_attn = auto llama_context: kv_unified = false llama_context: freq_base = 10000.0 llama_context: freq_scale = 1 llama_context: n_ctx_seq (49152) < n_ctx_train (1048576) -- the full capacity of the model will not be utilized set_abort_callback: call llama_context: CUDA_Host output buffer size = 0.50 MiB llama_kv_cache: layer 0: filtered llama_kv_cache: layer 1: filtered llama_kv_cache: layer 2: filtered llama_kv_cache: layer 3: filtered llama_kv_cache: layer 4: filtered llama_kv_cache: layer 5: dev = CUDA0 llama_kv_cache: layer 6: filtered llama_kv_cache: layer 7: filtered llama_kv_cache: layer 8: filtered llama_kv_cache: layer 9: filtered llama_kv_cache: layer 10: filtered llama_kv_cache: layer 11: filtered llama_kv_cache: layer 12: dev = CUDA0 llama_kv_cache: layer 13: filtered llama_kv_cache: layer 14: filtered llama_kv_cache: layer 15: filtered llama_kv_cache: layer 16: filtered llama_kv_cache: layer 17: filtered llama_kv_cache: layer 18: filtered llama_kv_cache: layer 19: dev = CUDA0 llama_kv_cache: layer 20: filtered llama_kv_cache: layer 21: filtered llama_kv_cache: layer 22: filtered llama_kv_cache: layer 23: filtered llama_kv_cache: layer 24: filtered llama_kv_cache: layer 25: filtered llama_kv_cache: layer 26: dev = CUDA0 llama_kv_cache: layer 27: filtered llama_kv_cache: layer 28: filtered llama_kv_cache: layer 29: filtered llama_kv_cache: layer 30: filtered llama_kv_cache: layer 31: filtered llama_kv_cache: layer 32: filtered llama_kv_cache: layer 33: dev = CUDA0 llama_kv_cache: layer 34: filtered llama_kv_cache: layer 35: filtered llama_kv_cache: layer 36: filtered llama_kv_cache: layer 37: filtered llama_kv_cache: layer 38: filtered llama_kv_cache: layer 39: filtered llama_kv_cache: layer 40: filtered llama_kv_cache: layer 41: filtered llama_kv_cache: layer 42: dev = CUDA0 llama_kv_cache: layer 43: filtered llama_kv_cache: layer 44: filtered llama_kv_cache: layer 45: filtered llama_kv_cache: layer 46: filtered llama_kv_cache: layer 47: filtered llama_kv_cache: layer 48: filtered llama_kv_cache: layer 49: filtered llama_kv_cache: layer 50: filtered llama_kv_cache: layer 51: filtered llama_kv_cache: CUDA0 KV buffer size = 288.00 MiB llama_kv_cache: size = 288.00 MiB ( 49152 cells, 6 layers, 1/1 seqs), K (f16): 144.00 MiB, V (f16): 144.00 MiB llama_memory_recurrent, layer 0: dev = CUDA0 llama_memory_recurrent: layer 1: skipped llama_memory_recurrent, layer 2: dev = CUDA0 llama_memory_recurrent: layer 3: skipped llama_memory_recurrent, layer 4: dev = CUDA0 llama_memory_recurrent: layer 5: skipped llama_memory_recurrent: layer 6: skipped llama_memory_recurrent, layer 7: dev = CUDA0 llama_memory_recurrent: layer 8: skipped llama_memory_recurrent, layer 9: dev = CUDA0 llama_memory_recurrent: layer 10: skipped llama_memory_recurrent, layer 11: dev = CUDA0 llama_memory_recurrent: layer 12: skipped llama_memory_recurrent: layer 13: skipped llama_memory_recurrent, layer 14: dev = CUDA0 llama_memory_recurrent: layer 15: skipped llama_memory_recurrent, layer 16: dev = CUDA0 llama_memory_recurrent: layer 17: skipped llama_memory_recurrent, layer 18: dev = CUDA0 llama_memory_recurrent: layer 19: skipped llama_memory_recurrent: layer 20: skipped llama_memory_recurrent, layer 21: dev = CUDA0 llama_memory_recurrent: layer 22: skipped llama_memory_recurrent, layer 23: dev = CUDA0 llama_memory_recurrent: layer 24: skipped llama_memory_recurrent, layer 25: dev = CUDA0 llama_memory_recurrent: layer 26: skipped llama_memory_recurrent: layer 27: skipped llama_memory_recurrent, layer 28: dev = CUDA0 llama_memory_recurrent: layer 29: skipped llama_memory_recurrent, layer 30: dev = CUDA0 llama_memory_recurrent: layer 31: skipped llama_memory_recurrent, layer 32: dev = CUDA0 llama_memory_recurrent: layer 33: skipped llama_memory_recurrent: layer 34: skipped llama_memory_recurrent, layer 35: dev = CUDA0 llama_memory_recurrent: layer 36: skipped llama_memory_recurrent, layer 37: dev = CUDA0 llama_memory_recurrent: layer 38: skipped llama_memory_recurrent, layer 39: dev = CUDA0 llama_memory_recurrent: layer 40: skipped llama_memory_recurrent, layer 41: dev = CUDA0 llama_memory_recurrent: layer 42: skipped llama_memory_recurrent: layer 43: skipped llama_memory_recurrent, layer 44: dev = CUDA0 llama_memory_recurrent: layer 45: skipped llama_memory_recurrent, layer 46: dev = CUDA0 llama_memory_recurrent: layer 47: skipped llama_memory_recurrent, layer 48: dev = CUDA0 llama_memory_recurrent: layer 49: skipped llama_memory_recurrent, layer 50: dev = CUDA0 llama_memory_recurrent: layer 51: skipped llama_memory_recurrent: CUDA0 RS buffer size = 47.62 MiB llama_memory_recurrent: size = 47.62 MiB ( 1 cells, 52 layers, 1 seqs), R (f32): 1.62 MiB, S (f32): 46.00 MiB llama_context: enumerating backends llama_context: backend_ptrs.size() = 2 llama_context: max_nodes = 3208 llama_context: reserving full memory module llama_context: worst-case: n_tokens = 512, n_seqs = 1, n_outputs = 1 graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1 llama_context: Flash Attention was auto, set to enabled graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512 graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1 graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512 llama_context: CUDA0 compute buffer size = 271.82 MiB llama_context: CUDA_Host compute buffer size = 101.26 MiB llama_context: graph nodes = 2188 llama_context: graph splits = 2 CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 | Model metadata: {'general.file_type': '15', 'general.quantization_version': '2', 'tokenizer.ggml.add_eos_token': 'false', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '11', 'tokenizer.ggml.model': 'gpt2', 'nemotron_h_moe.expert_weights_scale': '2.500000', 'nemotron_h_moe.expert_shared_count': '1', 'nemotron_h_moe.rope.freq_base': '10000.000000', 'nemotron_h_moe.attention.head_count': '32', 'nemotron_h_moe.context_length': '1048576', 'nemotron_h_moe.attention.layer_norm_epsilon': '0.000010', 'nemotron_h_moe.ssm.conv_kernel': '4', 'general.type': 'model', 'nemotron_h_moe.vocab_size': '131072', 'tokenizer.ggml.bos_token_id': '1', 'general.name': 'Nano v3 Llm_Vv1.0.1 Ga', 'tokenizer.ggml.add_bos_token': 'false', 'general.size_label': '128x2.4B', 'nemotron_h_moe.block_count': '52', 'general.sampling.temp': '1.000000', 'nemotron_h_moe.ssm.inner_size': '4096', 'general.sampling.top_p': '1.000000', 'nemotron_h_moe.attention.layer_norm_rms_epsilon': '0.000010', 'general.architecture': 'nemotron_h_moe', 'nemotron_h_moe.expert_count': '128', 'tokenizer.ggml.padding_token_id': '0', 'nemotron_h_moe.expert_used_count': '6', 'nemotron_h_moe.ssm.time_step_rank': '64', 'nemotron_h_moe.expert_group_count': '1', 'nemotron_h_moe.ssm.state_size': '128', 'nemotron_h_moe.expert_group_used_count': '1', 'nemotron_h_moe.rope.dimension_count': '84', 'nemotron_h_moe.attention.value_length': '128', 'tokenizer.chat_template': '{% macro render_extra_keys(json_dict, handled_keys) %}\n {%- if json_dict is mapping %}\n {%- for json_key in json_dict if json_key not in handled_keys %}\n {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}\n {{- \'\\n<\' ~ json_key ~ \'>\' ~ (json_dict[json_key] | tojson | safe) ~ \'\' }}\n {%- else %}\n {{-\'\\n<\' ~ json_key ~ \'>\' ~ (json_dict[json_key] | string) ~ \'\' }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n{% endmacro %}\n{%- set enable_thinking = enable_thinking if enable_thinking is defined else True %}\n{%- set truncate_history_thinking = truncate_history_thinking if truncate_history_thinking is defined else True %}\n\n{%- set ns = namespace(last_user_idx = -1) %}\n{%- set loop_messages = messages %}\n{%- for m in loop_messages %}\n {%- if m["role"] == "user" %}\n {%- set ns.last_user_idx = loop.index0 %}\n {%- endif %}\n{%- endfor %}\n\n{%- if messages[0]["role"] == "system" %}\n {%- set system_message = messages[0]["content"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = "" %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n{# Recompute last_user_idx relative to loop_messages after handling system #}\n{%- set ns = namespace(last_user_idx = -1) %}\n{%- for m in loop_messages %}\n {%- if m["role"] == "user" %}\n {%- set ns.last_user_idx = loop.index0 %}\n {%- endif %}\n{%- endfor %}\n{%- if system_message is defined %}\n {{- "<|im_start|>system\\n" + system_message }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- "<|im_start|>system\\n" }}\n {%- endif %}\n{%- endif %}\n{%- if tools is iterable and tools | length > 0 %}\n {%- if system_message is defined and system_message | length > 0 %}\n {{- "\\n\\n" }}\n {%- endif %}\n {{- "# Tools\\n\\nYou have access to the following functions:\\n\\n" }}\n {{- "" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- "\\n\\n" ~ tool.name ~ "" }}\n {%- if tool.description is defined %}\n {{- \'\\n\' ~ (tool.description | trim) ~ \'\' }}\n {%- endif %}\n {{- \'\\n\' }}\n {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- \'\\n\' }}\n {{- \'\\n\' ~ param_name ~ \'\' }}\n {%- if param_fields.type is defined %}\n {{- \'\\n\' ~ (param_fields.type | string) ~ \'\' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- \'\\n\' ~ (param_fields.description | trim) ~ \'\' }}\n {%- endif %}\n {%- if param_fields.enum is defined %}\n {{- \'\\n\' ~ (param_fields.enum | tojson | safe) ~ \'\' }}\n {%- endif %}\n {%- set handled_keys = [\'name\', \'type\', \'description\', \'enum\'] %}\n {{- render_extra_keys(param_fields, handled_keys) }}\n {{- \'\\n\' }}\n {%- endfor %}\n {%- endif %}\n {% set handled_keys = [\'type\', \'properties\', \'required\'] %}\n {{- render_extra_keys(tool.parameters, handled_keys) }}\n {%- if tool.parameters is defined and tool.parameters.required is defined %}\n {{- \'\\n\' ~ (tool.parameters.required | tojson | safe) ~ \'\' }}\n {%- endif %}\n {{- \'\\n\' }}\n {%- set handled_keys = [\'type\', \'name\', \'description\', \'parameters\'] %}\n {{- render_extra_keys(tool, handled_keys) }}\n {{- \'\\n\' }}\n {%- endfor %}\n {{- "\\n" }}\n\n {{- \'\\n\\nIf you choose to call a function ONLY reply in the following format with NO suffix:\\n\\n\\n\\n\\nvalue_1\\n\\n\\nThis is the value for the second parameter\\nthat can span\\nmultiple lines\\n\\n\\n\\n\\n\\nReminder:\\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\\n- Required parameters MUST be specified\\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\\n\' }}\n{%- endif %}\n\n\n{%- if system_message is defined %}\n {{- \'<|im_end|>\\n\' }}\n{%- else %}\n {%- if tools is iterable and tools | length > 0 %}\n {{- \'<|im_end|>\\n\' }}\n {%- endif %}\n{%- endif %}\n\n{%- for message in loop_messages %}\n {%- if message.role == "assistant" %}\n {# Add reasoning content in to content field for unified processing below. #}\n {%- if message.reasoning_content is defined and message.reasoning_content is string and message.reasoning_content | trim | length > 0 %}\n {%- set content = "\\n" ~ message.reasoning_content ~ "\\n\\n" ~ (message.content | default(\'\', true)) %}\n {%- else %}\n {%- set content = message.content | default(\'\', true) %}\n {%- if content is string -%}\n {# Allow downstream logic to to take care of broken thought, only handle coherent reasoning here. #}\n {%- if \'\' not in content and \'\' not in content -%}\n {%- set content = "" ~ content -%}\n {%- endif -%}\n {%- else -%}\n {%- set content = content -%}\n {%- endif -%}\n {%- endif %}\n {%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}\n {# Assistant message has tool calls. #}\n {{- \'<|im_start|>assistant\\n\' }}\n {%- set include_content = not (truncate_history_thinking and loop.index0 < ns.last_user_idx) %}\n {%- if content is string and content | trim | length > 0 %}\n {%- if include_content %}\n {{- (content | trim) ~ \'\\n\' -}}\n {%- else %}\n {%- set c = (content | string) %}\n {%- if \'\' in c %}\n {# Keep only content after the last closing think. Also generation prompt causes this. #}\n {%- set c = c.split(\'\')[-1] %}\n {%- elif \'\' in c %}\n {# If was opened but never closed, drop the trailing think segment #}\n {%- set c = c.split(\'\')[0] %}\n {%- endif %}\n {%- set c = "" ~ c | trim %}\n {%- if c | length > 0 %}\n {{- c ~ \'\\n\' -}}\n {%- endif %}\n {%- endif %}\n {%- else %}\n {{- "" -}}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- \'\\n\\n\' -}}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- \'\\n\' -}}\n {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}\n {{- args_value ~ \'\\n\\n\' -}}\n {%- endfor %}\n {%- endif %}\n {{- \'\\n\\n\' -}}\n {%- endfor %}\n {{- \'<|im_end|>\\n\' }}\n {%- else %}\n {# Assistant message doesn\'t have tool calls. #}\n {%- if not (truncate_history_thinking and loop.index0 < ns.last_user_idx) %}\n {{- \'<|im_start|>assistant\\n\' ~ (content | default(\'\', true) | string | trim) ~ \'<|im_end|>\\n\' }}\n {%- else %}\n {%- set c = (content | default(\'\', true) | string) %}\n {%- if \'\' in c and \'\' in c %}\n {%- set c = "" ~ c.split(\'\')[-1] %}\n {%- endif %}\n {%- set c = c | trim %}\n {%- if c | length > 0 %}\n {{- \'<|im_start|>assistant\\n\' ~ c ~ \'<|im_end|>\\n\' }}\n {%- else %}\n {{- \'<|im_start|>assistant\\n<|im_end|>\\n\' }}\n {%- endif %}\n {%- endif %}\n {%- endif %}\n {%- elif message.role == "user" or message.role == "system" %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' }}\n {%- set content = message.content | string %}\n {{- content }}\n {{- \'<|im_end|>\\n\' }}\n {%- elif message.role == "tool" %}\n {%- if loop.previtem and loop.previtem.role != "tool" %}\n {{- \'<|im_start|>user\\n\' }}\n {%- endif %}\n {{- \'\\n\' }}\n {{- message.content }}\n {{- \'\\n\\n\' }}\n {%- if not loop.last and loop.nextitem.role != "tool" %}\n {{- \'<|im_end|>\\n\' }}\n {%- elif loop.last %}\n {{- \'<|im_end|>\\n\' }}\n {%- endif %}\n {%- else %}\n {{- \'<|im_start|>\' + message.role + \'\\n\' + message.content + \'<|im_end|>\\n\' }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {%- if enable_thinking %}\n {{- \'<|im_start|>assistant\\n\\n\' }}\n {%- else %}\n {{- \'<|im_start|>assistant\\n\' }}\n {%- endif %}\n{%- endif %}\n', 'nemotron_h_moe.ssm.group_count': '8', 'nemotron_h_moe.rope.scaling.finetuned': 'false', 'nemotron_h_moe.attention.key_length': '128', 'tokenizer.ggml.pre': 'pixtral', 'nemotron_h_moe.expert_feed_forward_length': '1856', 'nemotron_h_moe.expert_weights_norm': 'true', 'nemotron_h_moe.embedding_length': '2688', 'nemotron_h_moe.expert_shared_feed_forward_length': '3712'} Available chat formats from metadata: chat_template.default Using gguf chat template: {% macro render_extra_keys(json_dict, handled_keys) %} {%- if json_dict is mapping %} {%- for json_key in json_dict if json_key not in handled_keys %} {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %} {{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '' }} {%- else %} {{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '' }} {%- endif %} {%- endfor %} {%- endif %} {% endmacro %} {%- set enable_thinking = enable_thinking if enable_thinking is defined else True %} {%- set truncate_history_thinking = truncate_history_thinking if truncate_history_thinking is defined else True %} {%- set ns = namespace(last_user_idx = -1) %} {%- set loop_messages = messages %} {%- for m in loop_messages %} {%- if m["role"] == "user" %} {%- set ns.last_user_idx = loop.index0 %} {%- endif %} {%- endfor %} {%- if messages[0]["role"] == "system" %} {%- set system_message = messages[0]["content"] %} {%- set loop_messages = messages[1:] %} {%- else %} {%- set system_message = "" %} {%- set loop_messages = messages %} {%- endif %} {%- if not tools is defined %} {%- set tools = [] %} {%- endif %} {# Recompute last_user_idx relative to loop_messages after handling system #} {%- set ns = namespace(last_user_idx = -1) %} {%- for m in loop_messages %} {%- if m["role"] == "user" %} {%- set ns.last_user_idx = loop.index0 %} {%- endif %} {%- endfor %} {%- if system_message is defined %} {{- "<|im_start|>system\n" + system_message }} {%- else %} {%- if tools is iterable and tools | length > 0 %} {{- "<|im_start|>system\n" }} {%- endif %} {%- endif %} {%- if tools is iterable and tools | length > 0 %} {%- if system_message is defined and system_message | length > 0 %} {{- "\n\n" }} {%- endif %} {{- "# Tools\n\nYou have access to the following functions:\n\n" }} {{- "" }} {%- for tool in tools %} {%- if tool.function is defined %} {%- set tool = tool.function %} {%- endif %} {{- "\n\n" ~ tool.name ~ "" }} {%- if tool.description is defined %} {{- '\n' ~ (tool.description | trim) ~ '' }} {%- endif %} {{- '\n' }} {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %} {%- for param_name, param_fields in tool.parameters.properties|items %} {{- '\n' }} {{- '\n' ~ param_name ~ '' }} {%- if param_fields.type is defined %} {{- '\n' ~ (param_fields.type | string) ~ '' }} {%- endif %} {%- if param_fields.description is defined %} {{- '\n' ~ (param_fields.description | trim) ~ '' }} {%- endif %} {%- if param_fields.enum is defined %} {{- '\n' ~ (param_fields.enum | tojson | safe) ~ '' }} {%- endif %} {%- set handled_keys = ['name', 'type', 'description', 'enum'] %} {{- render_extra_keys(param_fields, handled_keys) }} {{- '\n' }} {%- endfor %} {%- endif %} {% set handled_keys = ['type', 'properties', 'required'] %} {{- render_extra_keys(tool.parameters, handled_keys) }} {%- if tool.parameters is defined and tool.parameters.required is defined %} {{- '\n' ~ (tool.parameters.required | tojson | safe) ~ '' }} {%- endif %} {{- '\n' }} {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %} {{- render_extra_keys(tool, handled_keys) }} {{- '\n' }} {%- endfor %} {{- "\n" }} {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} {%- endif %} {%- if system_message is defined %} {{- '<|im_end|>\n' }} {%- else %} {%- if tools is iterable and tools | length > 0 %} {{- '<|im_end|>\n' }} {%- endif %} {%- endif %} {%- for message in loop_messages %} {%- if message.role == "assistant" %} {# Add reasoning content in to content field for unified processing below. #} {%- if message.reasoning_content is defined and message.reasoning_content is string and message.reasoning_content | trim | length > 0 %} {%- set content = "\n" ~ message.reasoning_content ~ "\n\n" ~ (message.content | default('', true)) %} {%- else %} {%- set content = message.content | default('', true) %} {%- if content is string -%} {# Allow downstream logic to to take care of broken thought, only handle coherent reasoning here. #} {%- if '' not in content and '' not in content -%} {%- set content = "" ~ content -%} {%- endif -%} {%- else -%} {%- set content = content -%} {%- endif -%} {%- endif %} {%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %} {# Assistant message has tool calls. #} {{- '<|im_start|>assistant\n' }} {%- set include_content = not (truncate_history_thinking and loop.index0 < ns.last_user_idx) %} {%- if content is string and content | trim | length > 0 %} {%- if include_content %} {{- (content | trim) ~ '\n' -}} {%- else %} {%- set c = (content | string) %} {%- if '' in c %} {# Keep only content after the last closing think. Also generation prompt causes this. #} {%- set c = c.split('')[-1] %} {%- elif '' in c %} {# If was opened but never closed, drop the trailing think segment #} {%- set c = c.split('')[0] %} {%- endif %} {%- set c = "" ~ c | trim %} {%- if c | length > 0 %} {{- c ~ '\n' -}} {%- endif %} {%- endif %} {%- else %} {{- "" -}} {%- endif %} {%- for tool_call in message.tool_calls %} {%- if tool_call.function is defined %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '\n\n' -}} {%- if tool_call.arguments is defined %} {%- for args_name, args_value in tool_call.arguments|items %} {{- '\n' -}} {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} {{- args_value ~ '\n\n' -}} {%- endfor %} {%- endif %} {{- '\n\n' -}} {%- endfor %} {{- '<|im_end|>\n' }} {%- else %} {# Assistant message doesn't have tool calls. #} {%- if not (truncate_history_thinking and loop.index0 < ns.last_user_idx) %} {{- '<|im_start|>assistant\n' ~ (content | default('', true) | string | trim) ~ '<|im_end|>\n' }} {%- else %} {%- set c = (content | default('', true) | string) %} {%- if '' in c and '' in c %} {%- set c = "" ~ c.split('')[-1] %} {%- endif %} {%- set c = c | trim %} {%- if c | length > 0 %} {{- '<|im_start|>assistant\n' ~ c ~ '<|im_end|>\n' }} {%- else %} {{- '<|im_start|>assistant\n<|im_end|>\n' }} {%- endif %} {%- endif %} {%- endif %} {%- elif message.role == "user" or message.role == "system" %} {{- '<|im_start|>' + message.role + '\n' }} {%- set content = message.content | string %} {{- content }} {{- '<|im_end|>\n' }} {%- elif message.role == "tool" %} {%- if loop.previtem and loop.previtem.role != "tool" %} {{- '<|im_start|>user\n' }} {%- endif %} {{- '\n' }} {{- message.content }} {{- '\n\n' }} {%- if not loop.last and loop.nextitem.role != "tool" %} {{- '<|im_end|>\n' }} {%- elif loop.last %} {{- '<|im_end|>\n' }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' }} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {%- if enable_thinking %} {{- '<|im_start|>assistant\n\n' }} {%- else %} {{- '<|im_start|>assistant\n' }} {%- endif %} {%- endif %} Using chat eos_token: <|im_end|> Using chat bos_token: