From 90df75a298fb248900c774a90966db396e88302a Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 14:47:20 +0700 Subject: [PATCH 1/6] chore: Pump llama.cpp version --- context/llama_server_context.h | 4 +++- llama.cpp | 2 +- whisper.cpp | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/context/llama_server_context.h b/context/llama_server_context.h index e8349895a..fec2b79a5 100644 --- a/context/llama_server_context.h +++ b/context/llama_server_context.h @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -1525,7 +1527,7 @@ struct llama_server_context { slot.id, slot.params.n_keep, n_left, n_discard); llama_kv_cache_seq_rm(ctx, slot.id, slot.params.n_keep + 1, slot.params.n_keep + n_discard + 1); - llama_kv_cache_seq_shift(ctx, slot.id, + llama_kv_cache_seq_add(ctx, slot.id, slot.params.n_keep + 1 + n_discard, slot.n_past, -n_discard); diff --git a/llama.cpp b/llama.cpp index 15499eb94..33a524480 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit 15499eb94227401bdc8875da6eb85c15d37068f7 +Subproject commit 33a52448061cfd2ea44da9e6cb30b2ec22e2f6d0 diff --git a/whisper.cpp b/whisper.cpp index a0ddd8392..ac283dbce 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit a0ddd8392c3427e833d893dc79b9b50f43cf8c9e +Subproject commit ac283dbce7d42735e3ed985329037bf23fe180aa From a40ae35fdd5d4acfa594b620320f8242985b9653 Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 15:00:53 +0700 Subject: [PATCH 2/6] Remove redundant include --- context/llama_server_context.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/context/llama_server_context.h b/context/llama_server_context.h index fec2b79a5..34cb74c7e 100644 --- a/context/llama_server_context.h +++ b/context/llama_server_context.h @@ -1,5 +1,3 @@ -#include -#include #include #include #include From 5aa069ef93d52498889e22216c96079b944425f1 Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 15:21:37 +0700 Subject: [PATCH 3/6] fix embed path for macOS-silicon --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 88d7a760b..1ccd6ac93 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -285,7 +285,7 @@ jobs: run: | ./install_deps.sh mkdir build && cd build - cmake -DWHISPER_COREML=1 -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. + cmake -DLLAMA_METAL_EMBED_LIBRARY=ON -DWHISPER_COREML=1 -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. CC=gcc-8 make -j $(sysctl -n hw.ncpu) ls -la From 007537970f8bdd4531b9f8c2a5d66f501bc66372 Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 15:47:21 +0700 Subject: [PATCH 4/6] rename build flag `_CUBLAS` to `_CUDA` --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 2 +- audio.md | 4 ++-- docs/docs/new/build-source.md | 2 +- examples/example-docker/cuda.Dockerfile | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1ccd6ac93..bd4be4e4c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -231,7 +231,7 @@ jobs: run: | ./install_deps.sh mkdir build && cd build - cmake -DLLAMA_NATIVE=OFF -DLLAMA_CUBLAS=ON -DLLAMA_CUBLAS=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. + cmake -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DWHISPER_CUDA=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. make -j $(nproc) ls -la diff --git a/CMakeLists.txt b/CMakeLists.txt index 1de2f6291..47abf6c44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install) # This is the critical line for installing another package -if(LLAMA_CUBLAS) +if(LLAMA_CUDA) cmake_minimum_required(VERSION 3.17) find_package(CUDAToolkit) diff --git a/audio.md b/audio.md index 2d39be0d2..1f8b9c19e 100644 --- a/audio.md +++ b/audio.md @@ -6,7 +6,7 @@ ```bash mkdir build && cd build -cmake -DLLAMA_CUBLAS=ON -DWHISPER_CUBLAS=ON .. +cmake -DLLAMA_CUDA=ON -DWHISPER_CUDA=ON .. make -j$(nproc) ``` @@ -31,7 +31,7 @@ cmake --build build -j --config Release ``` mkdir -p build cd build -cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DWHISPER_CUBLAS=ON -DWHISPER_SDL2=ON +cmake .. -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=ON -DWHISPER_CUDA=ON -DWHISPER_SDL2=ON cmake --build . --config Release ``` diff --git a/docs/docs/new/build-source.md b/docs/docs/new/build-source.md index 93ecb29e1..23290d779 100644 --- a/docs/docs/new/build-source.md +++ b/docs/docs/new/build-source.md @@ -69,7 +69,7 @@ Now, let's generate the build files. ```bash mkdir build && cd build - cmake -DLLAMA_CUBLAS=ON .. + cmake -DLLAMA_CUDA=ON .. ``` ## Build the Application diff --git a/examples/example-docker/cuda.Dockerfile b/examples/example-docker/cuda.Dockerfile index 184d8fc5c..ac7feb32f 100644 --- a/examples/example-docker/cuda.Dockerfile +++ b/examples/example-docker/cuda.Dockerfile @@ -12,7 +12,7 @@ RUN apt-get update && \ ./install_deps.sh && \ mkdir build && \ cd build && \ - cmake .. -DDEBUG=ON -DLLAMA_CUBLAS=ON -DLLAMA_CUDA_F16=ON -DLLAMA_CUDA_DMMV_X=64 -DLLAMA_CUDA_MMV_Y=32 && \ + cmake .. -DDEBUG=ON -DLLAMA_CUDA=ON -DLLAMA_CUDA_F16=ON -DLLAMA_CUDA_DMMV_X=64 -DLLAMA_CUDA_MMV_Y=32 && \ cmake --build . --config Release -j $(nproc) && \ apt-get remove --purge -y git cmake && \ apt-get autoremove -y && \ From 9376926ed50806c3da4ebff043499b56781e10d1 Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 16:30:47 +0700 Subject: [PATCH 5/6] cleanup unused mul_mat_q --- controllers/llamaCPP.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 021c79faa..c2bbd0d41 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -630,10 +630,6 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr jsonBody) { log_set_target(llama_log_folder + "llama.log"); } // Set folder for llama log } -#ifdef GGML_USE_CUBLAS - LOG_INFO << "Setting up GGML CUBLAS PARAMS"; - params.mul_mat_q = false; -#endif // GGML_USE_CUBLAS if (params.model_alias == "unknown") { params.model_alias = params.model; } From a6e697e0a9821a2fe50c93ddca4a00189eecbec0 Mon Sep 17 00:00:00 2001 From: CameronNguyen130820 Date: Tue, 2 Apr 2024 18:07:29 +0700 Subject: [PATCH 6/6] rename build flag `_CUBLAS` to `_CUDA` --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 47abf6c44..b39f6297f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if(LLAMA_CUDA) find_package(CUDAToolkit) if(CUDAToolkit_FOUND) message(STATUS "cuBLAS found") - add_compile_definitions(GGML_USE_CUBLAS) + add_compile_definitions(GGML_USE_CUDA) endif() endif()