diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 88d7a760b..bd4be4e4c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -231,7 +231,7 @@ jobs: run: | ./install_deps.sh mkdir build && cd build - cmake -DLLAMA_NATIVE=OFF -DLLAMA_CUBLAS=ON -DLLAMA_CUBLAS=ON -DWHISPER_CUBLAS=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. + cmake -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON -DWHISPER_CUDA=ON -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. make -j $(nproc) ls -la @@ -285,7 +285,7 @@ jobs: run: | ./install_deps.sh mkdir build && cd build - cmake -DWHISPER_COREML=1 -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. + cmake -DLLAMA_METAL_EMBED_LIBRARY=ON -DWHISPER_COREML=1 -DNITRO_VERSION=${{ needs.set-nitro-version.outputs.version }} .. CC=gcc-8 make -j $(sysctl -n hw.ncpu) ls -la diff --git a/CMakeLists.txt b/CMakeLists.txt index 1de2f6291..b39f6297f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,13 +26,13 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install) # This is the critical line for installing another package -if(LLAMA_CUBLAS) +if(LLAMA_CUDA) cmake_minimum_required(VERSION 3.17) find_package(CUDAToolkit) if(CUDAToolkit_FOUND) message(STATUS "cuBLAS found") - add_compile_definitions(GGML_USE_CUBLAS) + add_compile_definitions(GGML_USE_CUDA) endif() endif() diff --git a/audio.md b/audio.md index 2d39be0d2..1f8b9c19e 100644 --- a/audio.md +++ b/audio.md @@ -6,7 +6,7 @@ ```bash mkdir build && cd build -cmake -DLLAMA_CUBLAS=ON -DWHISPER_CUBLAS=ON .. +cmake -DLLAMA_CUDA=ON -DWHISPER_CUDA=ON .. make -j$(nproc) ``` @@ -31,7 +31,7 @@ cmake --build build -j --config Release ``` mkdir -p build cd build -cmake .. -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON -DWHISPER_CUBLAS=ON -DWHISPER_SDL2=ON +cmake .. -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=ON -DWHISPER_CUDA=ON -DWHISPER_SDL2=ON cmake --build . --config Release ``` diff --git a/context/llama_server_context.h b/context/llama_server_context.h index e8349895a..34cb74c7e 100644 --- a/context/llama_server_context.h +++ b/context/llama_server_context.h @@ -1525,7 +1525,7 @@ struct llama_server_context { slot.id, slot.params.n_keep, n_left, n_discard); llama_kv_cache_seq_rm(ctx, slot.id, slot.params.n_keep + 1, slot.params.n_keep + n_discard + 1); - llama_kv_cache_seq_shift(ctx, slot.id, + llama_kv_cache_seq_add(ctx, slot.id, slot.params.n_keep + 1 + n_discard, slot.n_past, -n_discard); diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 021c79faa..c2bbd0d41 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -630,10 +630,6 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr jsonBody) { log_set_target(llama_log_folder + "llama.log"); } // Set folder for llama log } -#ifdef GGML_USE_CUBLAS - LOG_INFO << "Setting up GGML CUBLAS PARAMS"; - params.mul_mat_q = false; -#endif // GGML_USE_CUBLAS if (params.model_alias == "unknown") { params.model_alias = params.model; } diff --git a/docs/docs/new/build-source.md b/docs/docs/new/build-source.md index 93ecb29e1..23290d779 100644 --- a/docs/docs/new/build-source.md +++ b/docs/docs/new/build-source.md @@ -69,7 +69,7 @@ Now, let's generate the build files. ```bash mkdir build && cd build - cmake -DLLAMA_CUBLAS=ON .. + cmake -DLLAMA_CUDA=ON .. ``` ## Build the Application diff --git a/examples/example-docker/cuda.Dockerfile b/examples/example-docker/cuda.Dockerfile index 184d8fc5c..ac7feb32f 100644 --- a/examples/example-docker/cuda.Dockerfile +++ b/examples/example-docker/cuda.Dockerfile @@ -12,7 +12,7 @@ RUN apt-get update && \ ./install_deps.sh && \ mkdir build && \ cd build && \ - cmake .. -DDEBUG=ON -DLLAMA_CUBLAS=ON -DLLAMA_CUDA_F16=ON -DLLAMA_CUDA_DMMV_X=64 -DLLAMA_CUDA_MMV_Y=32 && \ + cmake .. -DDEBUG=ON -DLLAMA_CUDA=ON -DLLAMA_CUDA_F16=ON -DLLAMA_CUDA_DMMV_X=64 -DLLAMA_CUDA_MMV_Y=32 && \ cmake --build . --config Release -j $(nproc) && \ apt-get remove --purge -y git cmake && \ apt-get autoremove -y && \ diff --git a/llama.cpp b/llama.cpp index 15499eb94..33a524480 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit 15499eb94227401bdc8875da6eb85c15d37068f7 +Subproject commit 33a52448061cfd2ea44da9e6cb30b2ec22e2f6d0 diff --git a/whisper.cpp b/whisper.cpp index a0ddd8392..ac283dbce 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit a0ddd8392c3427e833d893dc79b9b50f43cf8c9e +Subproject commit ac283dbce7d42735e3ed985329037bf23fe180aa