Skip to content

Commit 1774011

Browse files
committed
Improved examples [skip ci]
1 parent 780efcd commit 1774011

4 files changed

Lines changed: 22 additions & 16 deletions

File tree

examples/cohere/example.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
using json = nlohmann::json;
1111

1212
// https://docs.cohere.com/reference/embed
13-
std::vector<std::string> fetch_embeddings(const std::vector<std::string>& texts, const std::string& input_type, char *api_key) {
13+
std::vector<std::string> embed(const std::vector<std::string>& texts, const std::string& input_type, char *api_key) {
1414
std::string url = "https://api.cohere.com/v1/embed";
1515
json data = {
1616
{"texts", texts},
@@ -61,14 +61,13 @@ int main() {
6161
"The cat is purring",
6262
"The bear is growling"
6363
};
64-
auto embeddings = fetch_embeddings(input, "search_document", api_key);
65-
64+
auto embeddings = embed(input, "search_document", api_key);
6665
for (size_t i = 0; i < input.size(); i++) {
6766
tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", pqxx::params{input[i], embeddings[i]});
6867
}
6968

7069
std::string query = "forest";
71-
auto query_embedding = fetch_embeddings({query}, "search_query", api_key)[0];
70+
auto query_embedding = embed({query}, "search_query", api_key)[0];
7271
pqxx::result result = tx.exec("SELECT content FROM documents ORDER BY embedding <~> $1 LIMIT 5", pqxx::params{query_embedding});
7372
for (const auto& row : result) {
7473
std::cout << row[0].as<std::string>() << std::endl;

examples/hybrid/example.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,15 @@
1111

1212
using json = nlohmann::json;
1313

14-
std::vector<std::vector<float>> fetch_embeddings(const std::vector<std::string>& input) {
14+
std::vector<std::vector<float>> embed(const std::vector<std::string>& texts, const std::string& taskType) {
15+
// nomic-embed-text-v1.5 uses a task prefix
16+
// https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
17+
std::vector<std::string> input;
18+
input.reserve(texts.size());
19+
for (auto& v : texts) {
20+
input.push_back(taskType + ": " + v);
21+
}
22+
1523
std::string url = "http://localhost:8080/v1/embeddings";
1624
json data = {
1725
{"input", input}
@@ -48,7 +56,7 @@ int main() {
4856
"The cat is purring",
4957
"The bear is growling"
5058
};
51-
auto embeddings = fetch_embeddings(input);
59+
auto embeddings = embed(input, "search_document");
5260

5361
for (size_t i = 0; i < input.size(); i++) {
5462
tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", pqxx::params{input[i], pgvector::Vector(embeddings[i])});
@@ -78,7 +86,7 @@ int main() {
7886
LIMIT 5
7987
)";
8088
std::string query = "growling bear";
81-
auto query_embedding = fetch_embeddings({query})[0];
89+
auto query_embedding = embed({query}, "search_query")[0];
8290
double k = 60;
8391
pqxx::result result = tx.exec(sql, pqxx::params{query, pgvector::Vector(query_embedding), k});
8492
for (const auto& row : result) {

examples/openai/example.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ using json = nlohmann::json;
99

1010
// https://platform.openai.com/docs/guides/embeddings/how-to-get-embeddings
1111
// input can be an array with 2048 elements
12-
std::vector<std::vector<float>> fetch_embeddings(const std::vector<std::string>& input, char *api_key) {
12+
std::vector<std::vector<float>> embed(const std::vector<std::string>& input, char *api_key) {
1313
std::string url = "https://api.openai.com/v1/embeddings";
1414
json data = {
1515
{"input", input},
@@ -53,14 +53,14 @@ int main() {
5353
"The cat is purring",
5454
"The bear is growling"
5555
};
56-
auto embeddings = fetch_embeddings(input, api_key);
57-
56+
auto embeddings = embed(input, api_key);
5857
for (size_t i = 0; i < input.size(); i++) {
5958
tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", pqxx::params{input[i], pgvector::Vector(embeddings[i])});
6059
}
6160

62-
int document_id = 1;
63-
pqxx::result result = tx.exec("SELECT content FROM documents WHERE id != $1 ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = $1) LIMIT 5", pqxx::params{document_id});
61+
std::string query = "forest";
62+
auto query_embedding = embed({query}, api_key)[0];
63+
pqxx::result result = tx.exec("SELECT content FROM documents ORDER BY embedding <=> $1 LIMIT 5", pqxx::params{pgvector::Vector(query_embedding)});
6464
for (const auto& row : result) {
6565
std::cout << row[0].as<std::string>() << std::endl;
6666
}

examples/sparse/example.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
using json = nlohmann::json;
1717

18-
std::vector<pgvector::SparseVector> fetch_embeddings(const std::vector<std::string>& inputs) {
18+
std::vector<pgvector::SparseVector> embed(const std::vector<std::string>& inputs) {
1919
std::string url = "http://localhost:3000/embed_sparse";
2020
json data = {
2121
{"inputs", inputs}
@@ -57,14 +57,13 @@ int main() {
5757
"The cat is purring",
5858
"The bear is growling"
5959
};
60-
auto embeddings = fetch_embeddings(input);
61-
60+
auto embeddings = embed(input);
6261
for (size_t i = 0; i < input.size(); i++) {
6362
tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", pqxx::params{input[i], embeddings[i]});
6463
}
6564

6665
std::string query = "forest";
67-
auto query_embedding = fetch_embeddings({query})[0];
66+
auto query_embedding = embed({query})[0];
6867
pqxx::result result = tx.exec("SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5", pqxx::params{query_embedding});
6968
for (const auto& row : result) {
7069
std::cout << row[0].as<std::string>() << std::endl;

0 commit comments

Comments
 (0)