forked from janhq/cortex.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizer.h
More file actions
72 lines (57 loc) · 1.83 KB
/
tokenizer.h
File metadata and controls
72 lines (57 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#pragma once
#include <sstream>
#include <string>
struct Tokenizer {
std::string eos_token = "";
bool add_eos_token = true;
std::string bos_token = "";
bool add_bos_token = true;
std::string unknown_token = "";
std::string padding_token = "";
std::string chat_template = "";
bool add_generation_prompt = true;
// Helper function for common fields
std::string BaseToString() const {
std::ostringstream ss;
ss << "eos_token: \"" << eos_token << "\"\n"
<< "add_eos_token: " << (add_eos_token ? "true" : "false") << "\n"
<< "bos_token: \"" << bos_token << "\"\n"
<< "add_bos_token: " << (add_bos_token ? "true" : "false") << "\n"
<< "unknown_token: \"" << unknown_token << "\"\n"
<< "padding_token: \"" << padding_token << "\"\n"
<< "chat_template: \"" << chat_template << "\"\n"
<< "add_generation_prompt: "
<< (add_generation_prompt ? "true" : "false") << "\"";
return ss.str();
}
virtual ~Tokenizer() = default;
virtual std::string ToString() = 0;
};
struct GgufTokenizer : public Tokenizer {
std::string pre = "";
~GgufTokenizer() override = default;
std::string ToString() override {
std::ostringstream ss;
ss << "GgufTokenizer {\n";
// Add base class members
ss << BaseToString() << "\n";
// Add derived class members
ss << "pre: \"" << pre << "\"\n";
ss << "}";
return ss.str();
}
};
struct SafeTensorTokenizer : public Tokenizer {
bool add_prefix_space = true;
~SafeTensorTokenizer() = default;
std::string ToString() override {
std::ostringstream ss;
ss << "SafeTensorTokenizer {\n";
// Add base class members
ss << BaseToString() << "\n";
// Add derived class members
ss << "add_prefix_space: " << (add_prefix_space ? "true" : "false") << "\n";
ss << "}";
return ss.str();
}
};