@@ -479,8 +479,8 @@ static void quantize_row_q4_0_reference(const float * restrict x, void * restric
479
479
const float v0 = x [i * QK + l + 0 ]* id ;
480
480
const float v1 = x [i * QK + l + 1 ]* id ;
481
481
482
- const uint8_t vi0 = (( int8_t ) ( round ( v0 )) ) + 8 ;
483
- const uint8_t vi1 = (( int8_t ) ( round ( v1 )) ) + 8 ;
482
+ const uint8_t vi0 = (int8_t )roundf ( v0 ) + 8 ;
483
+ const uint8_t vi1 = (int8_t )roundf ( v1 ) + 8 ;
484
484
485
485
assert (vi0 >= 0 && vi0 < 16 );
486
486
assert (vi1 >= 0 && vi1 < 16 );
@@ -747,8 +747,8 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
747
747
const float v0 = (x [i * QK + l + 0 ] - min )* id ;
748
748
const float v1 = (x [i * QK + l + 1 ] - min )* id ;
749
749
750
- const uint8_t vi0 = round (v0 );
751
- const uint8_t vi1 = round (v1 );
750
+ const uint8_t vi0 = roundf (v0 );
751
+ const uint8_t vi1 = roundf (v1 );
752
752
753
753
assert (vi0 >= 0 && vi0 < 16 );
754
754
assert (vi1 >= 0 && vi1 < 16 );
@@ -2173,16 +2173,16 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
2173
2173
#endif
2174
2174
}
2175
2175
2176
- inline static void ggml_vec_norm_f32 (const int n , float * s , const float * x ) { ggml_vec_dot_f32 (n , s , x , x ); * s = sqrt (* s ); }
2176
+ inline static void ggml_vec_norm_f32 (const int n , float * s , const float * x ) { ggml_vec_dot_f32 (n , s , x , x ); * s = sqrtf (* s ); }
2177
2177
inline static void ggml_vec_sqr_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = x [i ]* x [i ]; }
2178
- inline static void ggml_vec_sqrt_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = sqrt (x [i ]); }
2178
+ inline static void ggml_vec_sqrt_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = sqrtf (x [i ]); }
2179
2179
inline static void ggml_vec_abs_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = fabsf (x [i ]); }
2180
2180
inline static void ggml_vec_sgn_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? 1.f : ((x [i ] < 0.f ) ? -1.f : 0.f ); }
2181
2181
inline static void ggml_vec_step_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? 1.f : 0.f ; }
2182
2182
inline static void ggml_vec_relu_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? x [i ] : 0.f ; }
2183
2183
2184
- static const float GELU_COEF_A = 0.044715 ;
2185
- static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876 ;
2184
+ static const float GELU_COEF_A = 0.044715f ;
2185
+ static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f ;
2186
2186
2187
2187
inline static float ggml_gelu_f32 (float x ) {
2188
2188
return 0.5f * x * (1.0f + tanhf (SQRT_2_OVER_PI * x * (1.0f + GELU_COEF_A * x * x )));
@@ -7565,8 +7565,8 @@ static void ggml_compute_forward_rope_f32(
7565
7565
const float * const src = (float * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7566
7566
float * dst_data = (float * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7567
7567
7568
- double x0 = src [0 ];
7569
- double x1 = src [1 ];
7568
+ double x0 = ( double ) src [0 ];
7569
+ double x1 = ( double ) src [1 ];
7570
7570
7571
7571
dst_data [0 ] = x0 * cos_theta - x1 * sin_theta ;
7572
7572
dst_data [1 ] = x0 * sin_theta + x1 * cos_theta ;
@@ -7621,8 +7621,8 @@ static void ggml_compute_forward_rope_f16(
7621
7621
const ggml_fp16_t * const src = (ggml_fp16_t * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7622
7622
ggml_fp16_t * dst_data = (ggml_fp16_t * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7623
7623
7624
- double x0 = ggml_fp16_to_fp32 (src [0 ]);
7625
- double x1 = ggml_fp16_to_fp32 (src [1 ]);
7624
+ double x0 = ( double ) ggml_fp16_to_fp32 (src [0 ]);
7625
+ double x1 = ( double ) ggml_fp16_to_fp32 (src [1 ]);
7626
7626
7627
7627
dst_data [0 ] = ggml_fp32_to_fp16 (x0 * cos_theta - x1 * sin_theta );
7628
7628
dst_data [1 ] = ggml_fp32_to_fp16 (x0 * sin_theta + x1 * cos_theta );
@@ -8298,7 +8298,7 @@ static void ggml_compute_forward_flash_attn_f32(
8298
8298
const int ir0 = dr * ith ;
8299
8299
const int ir1 = MIN (ir0 + dr , nr );
8300
8300
8301
- const float scale = 1.0 / sqrt (( double ) D );
8301
+ const float scale = 1.0f / sqrtf ( D );
8302
8302
8303
8303
//printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale);
8304
8304
@@ -8507,7 +8507,7 @@ static void ggml_compute_forward_flash_attn_f16(
8507
8507
const int ir0 = dr * ith ;
8508
8508
const int ir1 = MIN (ir0 + dr , nr );
8509
8509
8510
- const float scale = 1.0 / sqrt (( double ) D );
8510
+ const float scale = 1.0f / sqrtf ( D );
8511
8511
8512
8512
//printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale);
8513
8513
0 commit comments