diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h index 050161393456e..cc79f159805e3 100644 --- a/ggml/src/ggml-common.h +++ b/ggml/src/ggml-common.h @@ -59,8 +59,13 @@ typedef sycl::half2 ggml_half2; #ifndef __cplusplus #ifndef static_assert #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) -#define static_assert(cond, msg) _Static_assert(cond, msg) -#else + #if defined(_MSC_VER) + // MS defines static_assert in assert.h (see -Wmicrosoft-static-assert) + #include + #else // Not msft + #define static_assert(cond, msg) _Static_assert(cond, msg) + #endif +#else // Older C #define static_assert(cond, msg) struct global_scope_noop_trick #endif #endif diff --git a/ggml/src/ggml-cpu-impl.h b/ggml/src/ggml-cpu-impl.h index 5b45155b028f1..27a42aac844ea 100644 --- a/ggml/src/ggml-cpu-impl.h +++ b/ggml/src/ggml-cpu-impl.h @@ -139,7 +139,7 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #ifdef _MSC_VER -typedef uint16_t ggml_fp16_internal_t; +typedef float16_t ggml_fp16_internal_t; #define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3f01092d9f59a..00c836072c08a 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -468,7 +468,7 @@ inline static void * ggml_calloc(size_t num, size_t size) { #endif // floating point type used to accumulate sums -typedef double ggml_float; +typedef float ggml_float; #undef MIN #undef MAX @@ -13937,7 +13937,7 @@ static void ggml_compute_forward_soft_max_f32( ggml_float sum = ggml_vec_soft_max_f32(nc, dp, wp, max); assert(sum > 0.0); - sum = 1.0/sum; + sum = 1.f/sum; ggml_vec_scale_f32(nc, dp, sum); #ifndef NDEBUG @@ -16020,7 +16020,7 @@ static void ggml_compute_forward_flash_attn_back_f32( assert(sum > 0.0); - sum = 1.0/sum; + sum = 1.f/sum; ggml_vec_scale_f32(masked_begin, SM, sum); } @@ -17091,7 +17091,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( ggml_vec_max_f32(nc, &max, s0); ggml_float sum = ggml_vec_soft_max_f32(nc, ds0, s0, max); assert(sum > 0.0); - ggml_vec_scale_f32(nc, ds0, 1.0/sum); + ggml_vec_scale_f32(nc, ds0, 1.f/sum); // grad(src0) = (softmax(src0) - src1) * grad(cross_entropy_loss(src0, src1)) / nr ggml_vec_sub_f32(nc, ds0, ds0, s1); @@ -19414,7 +19414,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data); #include "windows.h" // TODO: support > 64 CPUs -bool ggml_thread_apply_affinity(bool * mask) { +static bool ggml_thread_apply_affinity(bool * mask) { HANDLE h = GetCurrentThread(); uint64_t bitmask = 0ULL; @@ -21081,13 +21081,13 @@ static enum ggml_opt_result ggml_opt_adam( float gnorm = 1.0f; if (gclip > 0.0f) { // gradient clipping - ggml_float sum = 0.0; + double sum = 0.0; for (int64_t i = 0; i < nx; ++i) { - sum += (ggml_float)(g[i]*g[i]); + sum += (double)(g[i]*g[i]); } - ggml_float norm = sqrt(sum); - if (norm > (ggml_float) gclip) { - gnorm = (float) ((ggml_float) gclip / norm); + ggml_float norm = (ggml_float) sqrt(sum); + if (norm > gclip) { + gnorm = (float) (gclip / norm); } } const float beta1h = alpha*sched/(1.0f - powf(beta1, opt->iter));