|
11 | 11 |
|
12 | 12 | using json = nlohmann::json; |
13 | 13 |
|
14 | | -std::vector<std::vector<float>> fetch_embeddings(const std::vector<std::string>& input) { |
| 14 | +std::vector<std::vector<float>> embed(const std::vector<std::string>& texts, const std::string& taskType) { |
| 15 | + // nomic-embed-text-v1.5 uses a task prefix |
| 16 | + // https://huggingface.co/nomic-ai/nomic-embed-text-v1.5 |
| 17 | + std::vector<std::string> input; |
| 18 | + input.reserve(texts.size()); |
| 19 | + for (auto& v : texts) { |
| 20 | + input.push_back(taskType + ": " + v); |
| 21 | + } |
| 22 | + |
15 | 23 | std::string url = "http://localhost:8080/v1/embeddings"; |
16 | 24 | json data = { |
17 | 25 | {"input", input} |
@@ -48,7 +56,7 @@ int main() { |
48 | 56 | "The cat is purring", |
49 | 57 | "The bear is growling" |
50 | 58 | }; |
51 | | - auto embeddings = fetch_embeddings(input); |
| 59 | + auto embeddings = embed(input, "search_document"); |
52 | 60 |
|
53 | 61 | for (size_t i = 0; i < input.size(); i++) { |
54 | 62 | tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", pqxx::params{input[i], pgvector::Vector(embeddings[i])}); |
@@ -78,7 +86,7 @@ int main() { |
78 | 86 | LIMIT 5 |
79 | 87 | )"; |
80 | 88 | std::string query = "growling bear"; |
81 | | - auto query_embedding = fetch_embeddings({query})[0]; |
| 89 | + auto query_embedding = embed({query}, "search_query")[0]; |
82 | 90 | double k = 60; |
83 | 91 | pqxx::result result = tx.exec(sql, pqxx::params{query, pgvector::Vector(query_embedding), k}); |
84 | 92 | for (const auto& row : result) { |
|
0 commit comments