Skip to content

Commit 19ca39b

Browse files
committed
2 parents 15ad718 + d5df431 commit 19ca39b

47 files changed

Lines changed: 6848 additions & 2634 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
steps:
1717
- uses: actions/checkout@v3
1818
with:
19-
submodules: "true"
19+
submodules: "recursive"
2020

2121
# Used to host cibuildwheel
2222
- uses: actions/setup-python@v3
@@ -48,7 +48,7 @@ jobs:
4848
steps:
4949
- uses: actions/checkout@v3
5050
with:
51-
submodules: "true"
51+
submodules: "recursive"
5252
- uses: actions/setup-python@v3
5353
with:
5454
python-version: "3.8"

.github/workflows/build-docker.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
- name: Checkout
1515
uses: actions/checkout@v3
1616
with:
17-
submodules: "true"
17+
submodules: "recursive"
1818

1919
- name: Set up QEMU
2020
uses: docker/setup-qemu-action@v2

.github/workflows/publish-to-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
steps:
1919
- uses: actions/checkout@v3
2020
with:
21-
submodules: "true"
21+
submodules: "recursive"
2222
- name: Set up Python
2323
uses: actions/setup-python@v4
2424
with:

.github/workflows/publish.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
steps:
1313
- uses: actions/checkout@v3
1414
with:
15-
submodules: "true"
15+
submodules: "recursive"
1616
- name: Set up Python
1717
uses: actions/setup-python@v4
1818
with:

.github/workflows/test.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
steps:
2020
- uses: actions/checkout@v4
2121
with:
22-
submodules: "true"
22+
submodules: "recursive"
2323
- name: Set up Python ${{ matrix.python-version }}
2424
uses: actions/setup-python@v4
2525
with:
@@ -42,7 +42,7 @@ jobs:
4242
steps:
4343
- uses: actions/checkout@v3
4444
with:
45-
submodules: "true"
45+
submodules: "recursive"
4646
- name: Set up Python ${{ matrix.python-version }}
4747
uses: actions/setup-python@v4
4848
with:
@@ -65,7 +65,7 @@ jobs:
6565
steps:
6666
- uses: actions/checkout@v3
6767
with:
68-
submodules: "true"
68+
submodules: "recursive"
6969
- name: Set up Python ${{ matrix.python-version }}
7070
uses: actions/setup-python@v4
7171
with:
@@ -85,7 +85,7 @@ jobs:
8585
# steps:
8686
# - uses: actions/checkout@v3
8787
# with:
88-
# submodules: "true"
88+
# submodules: "recursive"
8989
# - name: Set up Python 3.8
9090
# uses: actions/setup-python@v4
9191
# with:
@@ -112,7 +112,7 @@ jobs:
112112
steps:
113113
- uses: actions/checkout@v3
114114
with:
115-
submodules: "true"
115+
submodules: "recursive"
116116
- name: Set up Python 3.8
117117
uses: actions/setup-python@v4
118118
with:

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
*.local
2+
13
.python-version
24

35
.vscode/

CHANGELOG.md

Lines changed: 167 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,175 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.54]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a
13+
- docs: fix typo in README.md embeddings example by @iamlemec in #1232
14+
15+
## [0.2.53]
16+
17+
- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a
18+
- fix: eos/bos_token set correctly for Jinja2ChatFormatter and automatic chat formatter by @CISC in #1230
19+
20+
## [0.2.52]
21+
22+
- feat: Update llama.cpp to ggerganov/llama.cpp@a33e6a0d2a66104ea9a906bdbf8a94d050189d91
23+
- fix: Llava15ChatHandler (this function takes at least 4 arguments) by @abetlen in 8383a9e5620f5df5a88f62da16813eac200dd706
24+
25+
## [0.2.51]
26+
27+
- feat: Update llama.cpp to ggerganov/llama.cpp@c39373398803c669056304090050fe3f44b41bf9
28+
- fix: Restore type hints for low-level api by @abetlen in 19234aa0dbd0c3c87656e65dd2b064665371925b
29+
30+
## [0.2.50]
31+
32+
- docs: Update Functionary OpenAI Server Readme by @jeffrey-fong in #1193
33+
- fix: LlamaHFTokenizer now receives pre_tokens by @abetlen in 47bad30dd716443652275099fa3851811168ff4a
34+
35+
## [0.2.49]
36+
37+
- fix: module 'llama_cpp.llama_cpp' has no attribute 'c_uint8' in Llama.save_state by @abetlen in db776a885cd4c20811f22f8bd1a27ecc71dba927
38+
- feat: Auto detect Mixtral's slightly different format by @lukestanley in #1214
39+
40+
## [0.2.48]
41+
42+
- feat: Update llama.cpp to ggerganov/llama.cpp@15499eb94227401bdc8875da6eb85c15d37068f7
43+
- feat: Add Google's Gemma formatting via chat_format="gemma" by @alvarobartt in #1210
44+
- feat: support minItems/maxItems in JSON grammar converter by @nopperl in 3921e10770996d95a9eb22c8248bacef39f69365
45+
- fix: Update from_pretrained defaults to match hf_hub_download and pull to local cache folder by @abetlen in e6d6260a91b7831733f7d1f73c7af46a3e8185ed
46+
- fix: Raise exceptions when llama model or context fails to load by @abetlen in dd22010e85265ae840c76ec835d67a29ed852722
47+
- docs: Update README.md to fix pip install llama cpp server by @audip in #1187
48+
49+
## [0.2.47]
50+
51+
- feat: Update llama.cpp to ggerganov/llama.cpp@973053d8b0d04809836b3339a50f68d9c842de90
52+
53+
## [0.2.46]
54+
55+
- feat: Update llama.cpp to ggerganov/llama.cpp@ba2135ccae7462470b3865c6e41d2e1d734eac05
56+
- feat: Pull models directly from huggingface by @abetlen in #1206
57+
- feat(low-level-api): Improve API static type-safety and performance. Low level api functions are positional args only now. by @abetlen in #1205
58+
59+
## [0.2.45]
60+
61+
- feat: Update llama.cpp to ggerganov/llama.cpp@89febfed9322c8849520dc63c93ee4f5fd72556e
62+
63+
## [0.2.44]
64+
65+
- feat: Update llama.cpp to ggerganov/llama.cpp@4524290e87b8e107cc2b56e1251751546f4b9051
66+
- fix: create_embedding broken response for input type str by @abetlen in 0ce66bc080fe537590b05b24bf442480bf2dd045
67+
- fix: Use '\n' seperator for EventSourceResponse by @khimaros in #1188
68+
- fix: Incorporate embedding pooling layer fixes by @iamlemec in #1194
69+
70+
## [0.2.43]
71+
72+
- feat: Update llama.cpp to ggerganov/llama.cpp@8084d554406b767d36b3250b3b787462d5dd626f
73+
- feat: Support batch embeddings by @iamlemec in #1186
74+
- fix: submodule kompute is not included in sdist by @abetlen in 7dbbfdecadebe7750be650d9409959640ff9a460
75+
- fix: fix: Update openbuddy prompt format by @abetlen in 07a783779a62a4aac0b11161c7e0eb983ff215f8
76+
77+
## [0.2.42]
78+
79+
- feat: Update llama.cpp to ggerganov/llama.cpp@ea9c8e11436ad50719987fa23a289c74b7b40d40
80+
- fix: sample idx off-by-one error for logit_processors by @lapp0 in #1179
81+
- fix: chat formatting bugs in `chatml-function-calling` by @abetlen in 4b0e3320bd8c2c209e29978d0b21e2e471cc9ee3 and 68fb71b6a26a1e57331868f959b47ab4b87851e1
82+
83+
## [0.2.41]
84+
85+
- feat: Update llama.cpp to ggerganov/llama.cpp@895407f31b358e3d9335e847d13f033491ec8a5b
86+
- fix: Don't change order of json schema object properties in generated grammar unless prop_order is passed by @abetlen in d1822fed6b706f38bd1ff0de4dec5baaa3cf84fa
87+
88+
## [0.2.40]
89+
90+
- feat: Update llama.cpp to ggerganov/llama.cpp@3bdc4cd0f595a6096cca4a64aa75ffa8a3503465
91+
- feat: Generic chatml Function Calling using chat_format="chatml-function-calling"` by @abetlen in #957
92+
- fix: Circular dependancy preventing early Llama object free by @notwa in #1176
93+
- docs: Set the correct command for compiling with syscl support by @akarshanbiswas in #1172
94+
- feat: use gpu backend for clip if available by @iamlemec in #1175
95+
96+
## [0.2.39]
97+
98+
- feat: Update llama.cpp to ggerganov/llama.cpp@b08f22c882a1443e6b97081f3ce718a4d1a741f8
99+
- fix: Fix destructor logging bugs by using llama_log_callback to avoid suppress_stdout_stderr by @abetlen in 59760c85eddc72dfcc1839f43760ef72c23d6874
100+
101+
## [0.2.38]
102+
103+
- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915
104+
- feat: Add speculative decoding by @abetlen in #1120
105+
- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template by @abetlen in 078cca0361bf5a94d2cf52ed04980d20e32d6f95
106+
107+
## [0.2.37]
108+
109+
- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde
110+
- feat: Automatically set chat format from gguf by @abetlen in #1110
111+
112+
## [0.2.36]
113+
114+
- feat: Update llama.cpp to ggerganov/llama.cpp@2aed77eb06a329f0d82bb1c467f4244904d4073f
115+
- feat: Add mistral instruct chat format as "mistral-instruct" by @Rafaelblsilva in #799
116+
117+
## [0.2.35]
118+
119+
- feat: Update llama.cpp to ggerganov/llama.cpp@d2f650cb5b04ee2726663e79b47da5efe196ce00
120+
121+
## [0.2.34]
122+
123+
- feat: Update llama.cpp to ggerganov/llama.cpp@6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855
124+
- feat: Add json schema mode by @abetlen in #1122
125+
126+
## [0.2.33]
127+
128+
- feat: Update llama.cpp to ggerganov/llama.cpp@faa3526a1eba458120987ed8269e5616385a76f4
129+
- feat(server): include llama-cpp-python version in openapi spec by @abetlen in cde7514c3d28e6d52f272614e9957208c344dde5
130+
- fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format. by @abetlen in 5b982d0f8c6f35242c8862ffdce00e17cea0b44f
131+
- fix: GGUF metadata KV overrides, re #1011 by @phiharri in #1116
132+
- fix: llama_log_set should be able to accept null pointer by @abetlen in c970d41a85381fd55235136f123422df0bf0c7e7
133+
134+
## [0.2.32]
135+
136+
- feat: Update llama.cpp to ggerganov/llama.cpp@504dc37be8446fb09b1ede70300250ad41be32a2
137+
- fix: from_json_schema oneof/anyof bug by @jndiogo in d3f5528ca8bcb9d69d4f27e21631e911f1fb9bfe
138+
- fix: pass chat handler not chat formatter for huggingface autotokenizer and tokenizer_config formats by @abetlen in 24f39454e91cf5dddbc4b6041aead4accc7c7a2d
139+
- feat: Add add_generation_prompt option for jinja2chatformatter by @abetlen in 7f3209b1eb4ad3260ba063801fab80a8c25a2f4c
140+
- feat: Add Jinja2ChatFormatter by @abetlen in be09318c26add8674ce494ae7cc480cce72a4146
141+
- feat: Expose gguf model metadata in metadata property by @abetlen in 5a34c57e5479e50c99aba9b38218cc48e6560b81
142+
143+
## [0.2.31]
144+
145+
- feat: Update llama.cpp to ggerganov/llama.cpp@a5cacb22b2114fd9adf61c00cbb237384d86bced
146+
- fix: Mirostat sampling now passes correct type to ctypes and tracks state during generation by @abetlen in 3babe3512cb95743108f2b595210c38ed6f1b904
147+
- fix: Python3.8 support in server by @abetlen in 141293a75b564a8699e0acba1da24d9aa1cf0ab1
148+
149+
## [0.2.30]
150+
151+
- feat: Update llama.cpp to ggerganov/llama.cpp@57e2a7a52a819883f40dada8a2edc24ecf48186b
152+
- feat(server): Add ability to load chat format from huggingface autotokenizer or tokenizer_config.json files by @abetlen in b8fc1c7d83ad4a9207c707ba1d954fe580286a01
153+
- feat: Integration of Jinja2 Templating for chat formats by @teleprint-me in #875
154+
- fix: Offload KQV by default by @abetlen in 48c3b77e6f558a9899de0e1155c7dc0c7958d8e8
155+
- fix: Support Accept text/event-stream in chat and completion endpoints, resolves #1083 by @aniljava in #1088
156+
- fix(cli): allow passing n_ctx=0 to openAI API server args to use model n_ctx_train field per #1015 by @K-Mistele in #1093
157+
158+
## [0.2.29]
159+
160+
- feat: Update llama.cpp to ggerganov/llama.cpp@4483396751c79dea540808b9cb9238245d06da2b
161+
- feat: Add split_mode option by @abetlen in 84615adbc6855c8384807c42f0130f9a1763f99d
162+
- feat: Implement GGUF metadata KV overrides by @phiharri in #1011
163+
- fix: Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor by @yieldthought in #1012
164+
- fix: Fix low_level_api_chat_cpp example to match current API by @aniljava in #1086
165+
- fix: Fix Pydantic model parsing by @DeNeutoy in #1087
166+
167+
## [0.2.28]
168+
169+
- feat: Update llama.cpp to ggerganov/llama.cpp@6efb8eb30e7025b168f3fda3ff83b9b386428ad6
170+
- feat: Add ability to pass in penalize_nl param by @shankinson in #1068
171+
- fix: print_grammar to stderr by @turian in #1052
172+
10173
## [0.2.27]
11174

12175
- feat: Update llama.cpp to ggerganov/llama.cpp@b3a7c20b5c035250257d2b62851c379b159c899a
13176
- feat: Add `saiga` chat format by @femoiseev in #1050
14177
- feat: Added `chatglm3` chat format by @xaviviro in #1059
15-
- fix: Correct typo in README.md by @qeleb in (#1058)
178+
- fix: Correct typo in README.md by @qeleb in (#1058)
16179

17180
## [0.2.26]
18181

@@ -145,7 +308,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
145308

146309
## [0.2.11]
147310

148-
- Fix bug in `llama_model_params` object has no attribute `logits_all` by @abetlen in d696251fbe40015e8616ea7a7d7ad5257fd1b896
311+
- Fix bug in `llama_model_params` object has no attribute `logits_all` by @abetlen in d696251fbe40015e8616ea7a7d7ad5257fd1b896
149312

150313
## [0.2.10]
151314

@@ -333,7 +496,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
333496

334497
## [0.1.60]
335498

336-
NOTE: This release was deleted due to a bug with the packaging system that caused pip installations to fail.
499+
NOTE: This release was deleted due to a bug with the packaging system that caused pip installations to fail.
337500

338501
- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
339502
- Temporarily disable cache for completion requests
@@ -357,4 +520,4 @@ NOTE: This release was deleted due to a bug with the packaging system that caus
357520
- (misc) Added first version of the changelog
358521
- (server) Use async routes
359522
- (python-api) Use numpy for internal buffers to reduce memory usage and improve performance.
360-
- (python-api) Performance bug in stop sequence check slowing down streaming.
523+
- (python-api) Performance bug in stop sequence check slowing down streaming.

CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ if (LLAMA_BUILD)
4646
)
4747

4848
if (LLAVA_BUILD)
49+
if (LLAMA_CUBLAS)
50+
add_compile_definitions(GGML_USE_CUBLAS)
51+
endif()
52+
53+
if (LLAMA_METAL)
54+
add_compile_definitions(GGML_USE_METAL)
55+
endif()
56+
4957
# Building llava
5058
add_subdirectory(vendor/llama.cpp/examples/llava)
5159
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")

Makefile

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,34 @@ deps:
1010
python3 -m pip install -e ".[all]"
1111

1212
build:
13-
python3 -m pip install -e .
13+
python3 -m pip install --verbose -e .
14+
15+
build.debug:
16+
CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable .
1417

1518
build.cuda:
16-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install -e .
19+
CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e .
1720

1821
build.opencl:
19-
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install -e .
22+
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
2023

2124
build.openblas:
22-
CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install -e .
25+
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
2326

2427
build.blis:
25-
CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" python3 -m pip install -e .
28+
CMAKE_ARGS="-DLLAMA_BLAS=on -DLLAMA_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
2629

2730
build.metal:
28-
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install -e .
31+
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e .
32+
33+
build.vulkan:
34+
CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
35+
36+
build.kompute:
37+
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e .
38+
39+
build.sycl:
40+
CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .
2941

3042
build.sdist:
3143
python3 -m build --sdist

0 commit comments

Comments
 (0)