File tree Expand file tree Collapse file tree 2 files changed +3
-2
lines changed Expand file tree Collapse file tree 2 files changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -128,7 +128,7 @@ option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cu
128
128
option (GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF )
129
129
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels" )
130
130
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels" )
131
- option (GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF )
131
+ option (GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" ON )
132
132
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
133
133
"ggml: iters./thread per block for Q2_K/Q6_K" )
134
134
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
Original file line number Diff line number Diff line change @@ -2,6 +2,7 @@ include(CheckCXXCompilerFlag)
2
2
3
3
unset (GGML_CDEF_PUBLIC )
4
4
5
+ add_compile_definitions (LLAMA_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES} )
5
6
add_compile_definitions (GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES} )
6
7
7
8
# enable libstdc++ assertions for debug builds
@@ -287,7 +288,7 @@ if (GGML_CUDA)
287
288
# 61 == integer CUDA intrinsics
288
289
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
289
290
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16 )
290
- set (CMAKE_CUDA_ARCHITECTURES "60;61;70;75;86" )
291
+ set (CMAKE_CUDA_ARCHITECTURES "52; 60;61;70;75;86" )
291
292
else ()
292
293
set (CMAKE_CUDA_ARCHITECTURES "52;61;70;75;86" )
293
294
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
You can’t perform that action at this time.
0 commit comments