Skip to content

Commit 64bfe69

Browse files
committed
Activate F16
1 parent 575ebc2 commit 64bfe69

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cu
128128
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
129129
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
130130
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
131-
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
131+
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" ON)
132132
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
133133
"ggml: iters./thread per block for Q2_K/Q6_K")
134134
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include(CheckCXXCompilerFlag)
22

33
unset(GGML_CDEF_PUBLIC)
44

5+
add_compile_definitions(LLAMA_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
56
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
67

78
# enable libstdc++ assertions for debug builds
@@ -287,7 +288,7 @@ if (GGML_CUDA)
287288
# 61 == integer CUDA intrinsics
288289
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
289290
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
290-
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75;86")
291+
set(CMAKE_CUDA_ARCHITECTURES "52;60;61;70;75;86")
291292
else()
292293
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75;86")
293294
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work

0 commit comments

Comments
 (0)