Skip to content

Commit b49c0ec

Browse files
committed
Added Cohere example [skip ci]
1 parent 07dc521 commit b49c0ec

File tree

3 files changed

+95
-0
lines changed

3 files changed

+95
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ And follow the instructions for your database library:
2525
Or check out some examples:
2626

2727
- [Embeddings](examples/openai/example.cpp) with OpenAI
28+
- [Binary embeddings](examples/cohere/example.cpp) with Cohere
2829
- [Recommendations](examples/disco/example.cpp) with Disco
2930

3031
## libpqxx

examples/cohere/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
cmake_minimum_required(VERSION 3.18)
2+
3+
project(example)
4+
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_FLAGS "-Wno-unknown-attributes")
7+
8+
include(FetchContent)
9+
10+
FetchContent_Declare(cpr GIT_REPOSITORY https://github.com/libcpr/cpr.git GIT_TAG 1.11.1)
11+
FetchContent_Declare(json GIT_REPOSITORY https://github.com/nlohmann/json.git GIT_TAG v3.11.3)
12+
FetchContent_Declare(libpqxx GIT_REPOSITORY https://github.com/jtv/libpqxx.git GIT_TAG 7.10.0)
13+
FetchContent_MakeAvailable(cpr json libpqxx)
14+
15+
add_executable(example example.cpp)
16+
target_include_directories(example PRIVATE ${CMAKE_SOURCE_DIR}/../../include)
17+
target_link_libraries(example PRIVATE cpr::cpr nlohmann_json::nlohmann_json pqxx)

examples/cohere/example.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include <bitset>
2+
#include <cstdint>
3+
#include <iostream>
4+
5+
#include <cpr/cpr.h>
6+
#include <nlohmann/json.hpp>
7+
#include <pgvector/pqxx.hpp>
8+
#include <pqxx/pqxx>
9+
10+
using json = nlohmann::json;
11+
12+
// https://docs.cohere.com/reference/embed
13+
std::vector<std::string> fetch_embeddings(const std::vector<std::string>& texts, const std::string& input_type, char *api_key) {
14+
std::string url = "https://api.cohere.com/v1/embed";
15+
json data = {
16+
{"texts", texts},
17+
{"model", "embed-english-v3.0"},
18+
{"input_type", input_type},
19+
{"embedding_types", {"ubinary"}}
20+
};
21+
22+
cpr::Response r = cpr::Post(
23+
cpr::Url{url},
24+
cpr::Body{data.dump()},
25+
cpr::Bearer{api_key},
26+
cpr::Header{{"Content-Type", "application/json"}}
27+
);
28+
json response = json::parse(r.text);
29+
30+
std::vector<std::string> embeddings;
31+
for (auto& v: response["embeddings"]["ubinary"]) {
32+
std::stringstream buf;
33+
for (uint8_t c : v) {
34+
std::bitset<8> b{c};
35+
buf << b.to_string();
36+
}
37+
embeddings.emplace_back(buf.str());
38+
}
39+
return embeddings;
40+
}
41+
42+
int main() {
43+
char *api_key = std::getenv("CO_API_KEY");
44+
if (!api_key) {
45+
std::cout << "Set CO_API_KEY" << std::endl;
46+
return 1;
47+
}
48+
49+
pqxx::connection conn("dbname=pgvector_example");
50+
51+
pqxx::work tx(conn);
52+
tx.exec("CREATE EXTENSION IF NOT EXISTS vector");
53+
tx.exec("DROP TABLE IF EXISTS documents");
54+
tx.exec("CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding bit(1024))");
55+
tx.commit();
56+
57+
std::vector<std::string> input = {
58+
"The dog is barking",
59+
"The cat is purring",
60+
"The bear is growling"
61+
};
62+
auto embeddings = fetch_embeddings(input, "search_document", api_key);
63+
64+
for (size_t i = 0; i < input.size(); i++) {
65+
tx.exec("INSERT INTO documents (content, embedding) VALUES ($1, $2)", {input[i], embeddings[i]});
66+
}
67+
tx.commit();
68+
69+
std::string query = "forest";
70+
auto query_embedding = fetch_embeddings({query}, "search_query", api_key)[0];
71+
pqxx::result result = tx.exec("SELECT content FROM documents ORDER BY embedding <~> $1 LIMIT 5", pqxx::params{query_embedding});
72+
for (auto const& row : result) {
73+
std::cout << row[0].c_str() << std::endl;
74+
}
75+
76+
return 0;
77+
}

0 commit comments

Comments
 (0)