@@ -479,8 +479,8 @@ static void quantize_row_q4_0_reference(const float * restrict x, void * restric
479
479
const float v0 = x [i * QK + l + 0 ]* id ;
480
480
const float v1 = x [i * QK + l + 1 ]* id ;
481
481
482
- const uint8_t vi0 = (( int8_t ) ( round ( v0 )) ) + 8 ;
483
- const uint8_t vi1 = (( int8_t ) ( round ( v1 )) ) + 8 ;
482
+ const uint8_t vi0 = (int8_t )roundf ( v0 ) + 8 ;
483
+ const uint8_t vi1 = (int8_t )roundf ( v1 ) + 8 ;
484
484
485
485
assert (vi0 >= 0 && vi0 < 16 );
486
486
assert (vi1 >= 0 && vi1 < 16 );
@@ -747,8 +747,8 @@ void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) {
747
747
const float v0 = (x [i * QK + l + 0 ] - min )* id ;
748
748
const float v1 = (x [i * QK + l + 1 ] - min )* id ;
749
749
750
- const uint8_t vi0 = round (v0 );
751
- const uint8_t vi1 = round (v1 );
750
+ const uint8_t vi0 = roundf (v0 );
751
+ const uint8_t vi1 = roundf (v1 );
752
752
753
753
assert (vi0 >= 0 && vi0 < 16 );
754
754
assert (vi1 >= 0 && vi1 < 16 );
@@ -1371,7 +1371,7 @@ inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, co
1371
1371
inline static void ggml_vec_div_f32 (const int n , float * z , const float * x , const float * y ) { for (int i = 0 ; i < n ; ++ i ) z [i ] = x [i ]/y [i ]; }
1372
1372
1373
1373
inline static void ggml_vec_dot_f32 (const int n , float * restrict s , const float * restrict x , const float * restrict y ) {
1374
- ggml_float sumf = 0.0 ;
1374
+ float sumf = 0.0f ;
1375
1375
1376
1376
#ifdef GGML_SIMD
1377
1377
const int np = (n & ~(GGML_F32_STEP - 1 ));
@@ -1449,7 +1449,7 @@ static inline __m512 dot_q4_0_oneblock_avx512(
1449
1449
#endif
1450
1450
1451
1451
inline static void ggml_vec_dot_f16 (const int n , float * restrict s , ggml_fp16_t * restrict x , ggml_fp16_t * restrict y ) {
1452
- ggml_float sumf = 0.0 ;
1452
+ float sumf = 0.0f ;
1453
1453
1454
1454
#if defined(GGML_SIMD )
1455
1455
const int np = (n & ~(GGML_F16_STEP - 1 ));
@@ -1934,7 +1934,7 @@ inline static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void
1934
1934
// compute GGML_VEC_DOT_UNROLL dot products at once
1935
1935
// xs - x row stride in bytes
1936
1936
inline static void ggml_vec_dot_f16_unroll (const int n , const int xs , float * restrict s , void * restrict xv , ggml_fp16_t * restrict y ) {
1937
- ggml_float sumf [GGML_VEC_DOT_UNROLL ] = { 0.0 };
1937
+ float sumf [GGML_VEC_DOT_UNROLL ] = { 0.0f };
1938
1938
1939
1939
ggml_fp16_t * restrict x [GGML_VEC_DOT_UNROLL ];
1940
1940
@@ -2208,19 +2208,19 @@ inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
2208
2208
#endif
2209
2209
}
2210
2210
2211
- inline static void ggml_vec_norm_f32 (const int n , float * s , const float * x ) { ggml_vec_dot_f32 (n , s , x , x ); * s = sqrt (* s ); }
2211
+ inline static void ggml_vec_norm_f32 (const int n , float * s , const float * x ) { ggml_vec_dot_f32 (n , s , x , x ); * s = sqrtf (* s ); }
2212
2212
inline static void ggml_vec_sqr_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = x [i ]* x [i ]; }
2213
- inline static void ggml_vec_sqrt_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = sqrt (x [i ]); }
2213
+ inline static void ggml_vec_sqrt_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = sqrtf (x [i ]); }
2214
2214
inline static void ggml_vec_abs_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = fabsf (x [i ]); }
2215
2215
inline static void ggml_vec_sgn_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? 1.f : ((x [i ] < 0.f ) ? -1.f : 0.f ); }
2216
2216
inline static void ggml_vec_step_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? 1.f : 0.f ; }
2217
2217
inline static void ggml_vec_relu_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? x [i ] : 0.f ; }
2218
2218
2219
- static const ggml_float GELU_COEF_A = 0.044715 ;
2220
- static const ggml_float SQRT_2_OVER_PI = 0.79788456080286535587989211986876 ;
2219
+ static const float GELU_COEF_A = 0.044715f ;
2220
+ static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f ;
2221
2221
2222
2222
inline static float ggml_gelu_f32 (float x ) {
2223
- return 0.5 * x * (1.0 + tanh (SQRT_2_OVER_PI * x * (1.0 + GELU_COEF_A * x * x )));
2223
+ return 0.5f * x * (1.0f + tanhf (SQRT_2_OVER_PI * x * (1.0f + GELU_COEF_A * x * x )));
2224
2224
}
2225
2225
2226
2226
inline static void ggml_vec_gelu_f16 (const int n , ggml_fp16_t * y , const ggml_fp16_t * x ) {
@@ -2249,7 +2249,7 @@ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
2249
2249
2250
2250
// Sigmoid Linear Unit (SiLU) function
2251
2251
inline static float ggml_silu_f32 (float x ) {
2252
- return x /(1.0 + exp (- x ));
2252
+ return x /(1.0f + expf (- x ));
2253
2253
}
2254
2254
2255
2255
inline static void ggml_vec_silu_f16 (const int n , ggml_fp16_t * y , const ggml_fp16_t * x ) {
@@ -2280,7 +2280,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
2280
2280
#ifndef GGML_USE_ACCELERATE
2281
2281
ggml_float sum = 0.0 ;
2282
2282
for (int i = 0 ; i < n ; ++ i ) {
2283
- sum += x [i ];
2283
+ sum += ( ggml_float ) x [i ];
2284
2284
}
2285
2285
* s = sum ;
2286
2286
#else
@@ -2290,7 +2290,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
2290
2290
2291
2291
inline static void ggml_vec_max_f32 (const int n , float * s , const float * x ) {
2292
2292
#ifndef GGML_USE_ACCELERATE
2293
- ggml_float max = - INFINITY ;
2293
+ float max = - INFINITY ;
2294
2294
for (int i = 0 ; i < n ; ++ i ) {
2295
2295
max = MAX (max , x [i ]);
2296
2296
}
@@ -2300,7 +2300,10 @@ inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
2300
2300
#endif
2301
2301
}
2302
2302
2303
- inline static void ggml_vec_norm_inv_f32 (const int n , float * s , const float * x ) { ggml_vec_norm_f32 (n , s , x ); * s = 1. /(* s ); }
2303
+ inline static void ggml_vec_norm_inv_f32 (const int n , float * s , const float * x ) {
2304
+ ggml_vec_norm_f32 (n , s , x );
2305
+ * s = 1.f /(* s );
2306
+ }
2304
2307
2305
2308
//
2306
2309
// logging
@@ -2695,7 +2698,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
2695
2698
const float f = table_f32_f16 [i ] = GGML_COMPUTE_FP16_TO_FP32 (ii );
2696
2699
table_gelu_f16 [i ] = GGML_FP32_TO_FP16 (ggml_gelu_f32 (f ));
2697
2700
table_silu_f16 [i ] = GGML_FP32_TO_FP16 (ggml_silu_f32 (f ));
2698
- table_exp_f16 [i ] = GGML_FP32_TO_FP16 (exp (f ));
2701
+ table_exp_f16 [i ] = GGML_FP32_TO_FP16 (expf (f ));
2699
2702
}
2700
2703
2701
2704
const uint64_t t_end = ggml_time_us (); UNUSED (t_end );
@@ -5737,31 +5740,32 @@ static void ggml_compute_forward_norm_f32(
5737
5740
const size_t nb2 = dst -> nb [2 ];
5738
5741
const size_t nb3 = dst -> nb [3 ];
5739
5742
5740
- const ggml_float eps = 1e-5f ; // TODO: make this a parameter
5743
+ const float eps = 1e-5f ; // TODO: make this a parameter
5741
5744
5742
5745
// TODO: optimize
5743
5746
for (int i03 = 0 ; i03 < ne03 ; i03 ++ ) {
5744
5747
for (int i02 = 0 ; i02 < ne02 ; i02 ++ ) {
5745
5748
for (int i01 = ith ; i01 < ne01 ; i01 += nth ) {
5746
5749
const float * x = (float * ) ((char * ) src0 -> data + i01 * nb01 + i02 * nb02 + i03 * nb03 );
5747
5750
5748
- ggml_float mean = 0.0 ;
5751
+ ggml_float sum = 0.0 ;
5749
5752
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5750
- mean += x [i00 ];
5753
+ sum += ( ggml_float ) x [i00 ];
5751
5754
}
5752
5755
5753
- mean /= ne00 ;
5756
+ float mean = sum / ne00 ;
5754
5757
5755
5758
float * y = (float * ) ((char * ) dst -> data + i01 * nb1 + i02 * nb2 + i03 * nb3 );
5756
5759
5757
5760
ggml_float sum2 = 0.0 ;
5758
5761
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5759
- ggml_float v = x [i00 ] - mean ;
5762
+ float v = x [i00 ] - mean ;
5760
5763
y [i00 ] = v ;
5761
- sum2 += v * v ;
5764
+ sum2 += ( ggml_float )( v * v ) ;
5762
5765
}
5763
5766
5764
- const float scale = 1.0 /sqrt (sum2 /ne00 + eps );
5767
+ float variance = sum2 /ne00 ;
5768
+ const float scale = 1.0f /sqrtf (variance + eps );
5765
5769
5766
5770
ggml_vec_scale_f32 (ne00 , y , scale );
5767
5771
}
@@ -5819,20 +5823,20 @@ static void ggml_compute_forward_rms_norm_f32(
5819
5823
const size_t nb2 = dst -> nb [2 ];
5820
5824
const size_t nb3 = dst -> nb [3 ];
5821
5825
5822
- const ggml_float eps = 1e-6f ; // TODO: make this a parameter
5826
+ const float eps = 1e-6f ; // TODO: make this a parameter
5823
5827
5824
5828
// TODO: optimize
5825
5829
for (int i03 = 0 ; i03 < ne03 ; i03 ++ ) {
5826
5830
for (int i02 = 0 ; i02 < ne02 ; i02 ++ ) {
5827
5831
for (int i01 = ith ; i01 < ne01 ; i01 += nth ) {
5828
5832
const float * x = (float * ) ((char * ) src0 -> data + i01 * nb01 + i02 * nb02 + i03 * nb03 );
5829
5833
5830
- ggml_float mean = 0.0 ;
5834
+ ggml_float sum = 0.0 ;
5831
5835
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5832
- mean += x [i00 ] * x [i00 ];
5836
+ sum += ( ggml_float )( x [i00 ] * x [i00 ]) ;
5833
5837
}
5834
5838
5835
- mean /= ne00 ;
5839
+ float mean = sum / ne00 ;
5836
5840
5837
5841
float * y = (float * ) ((char * ) dst -> data + i01 * nb1 + i02 * nb2 + i03 * nb3 );
5838
5842
@@ -5841,7 +5845,7 @@ static void ggml_compute_forward_rms_norm_f32(
5841
5845
// y[i00] = x[i00];
5842
5846
// }
5843
5847
5844
- const float scale = 1.0 / sqrt (mean + eps );
5848
+ const float scale = 1.0f / sqrtf (mean + eps );
5845
5849
5846
5850
ggml_vec_scale_f32 (ne00 , y , scale );
5847
5851
}
@@ -7407,12 +7411,12 @@ static void ggml_compute_forward_soft_max_f32(
7407
7411
ggml_fp16_t s = GGML_FP32_TO_FP16 (p [i ] - max );
7408
7412
memcpy (& scvt , & s , sizeof (scvt ));
7409
7413
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt ]);
7410
- sum += val ;
7414
+ sum += ( ggml_float ) val ;
7411
7415
p [i ] = val ;
7412
7416
}
7413
7417
}
7414
7418
7415
- assert (sum > 0.0f );
7419
+ assert (sum > 0.0 );
7416
7420
7417
7421
sum = 1.0 /sum ;
7418
7422
ggml_vec_scale_f32 (nc , p , sum );
@@ -7496,8 +7500,8 @@ static void ggml_compute_forward_rope_f32(
7496
7500
const float * const src = (float * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7497
7501
float * dst_data = (float * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7498
7502
7499
- double x0 = src [0 ];
7500
- double x1 = src [1 ];
7503
+ double x0 = ( double ) src [0 ];
7504
+ double x1 = ( double ) src [1 ];
7501
7505
7502
7506
dst_data [0 ] = x0 * cos_theta - x1 * sin_theta ;
7503
7507
dst_data [1 ] = x0 * sin_theta + x1 * cos_theta ;
@@ -7552,8 +7556,8 @@ static void ggml_compute_forward_rope_f16(
7552
7556
const ggml_fp16_t * const src = (ggml_fp16_t * )((char * ) src0 -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7553
7557
ggml_fp16_t * dst_data = (ggml_fp16_t * )((char * ) dst -> data + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0 );
7554
7558
7555
- double x0 = ggml_fp16_to_fp32 (src [0 ]);
7556
- double x1 = ggml_fp16_to_fp32 (src [1 ]);
7559
+ double x0 = ( double ) ggml_fp16_to_fp32 (src [0 ]);
7560
+ double x1 = ( double ) ggml_fp16_to_fp32 (src [1 ]);
7557
7561
7558
7562
dst_data [0 ] = ggml_fp32_to_fp16 (x0 * cos_theta - x1 * sin_theta );
7559
7563
dst_data [1 ] = ggml_fp32_to_fp16 (x0 * sin_theta + x1 * cos_theta );
@@ -8229,7 +8233,7 @@ static void ggml_compute_forward_flash_attn_f32(
8229
8233
const int ir0 = dr * ith ;
8230
8234
const int ir1 = MIN (ir0 + dr , nr );
8231
8235
8232
- const float scale = 1.0 / sqrt (( double ) D );
8236
+ const float scale = 1.0f / sqrtf ( D );
8233
8237
8234
8238
//printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale);
8235
8239
@@ -8276,7 +8280,7 @@ static void ggml_compute_forward_flash_attn_f32(
8276
8280
float max = - INFINITY ;
8277
8281
ggml_vec_max_f32 (M , & max , S );
8278
8282
8279
- float sum = 0.0f ;
8283
+ ggml_float sum = 0.0 ;
8280
8284
{
8281
8285
#ifdef GGML_SOFT_MAX_ACCELERATE
8282
8286
max = - max ;
@@ -8297,7 +8301,7 @@ static void ggml_compute_forward_flash_attn_f32(
8297
8301
ggml_fp16_t s = GGML_FP32_TO_FP16 (SS [j ] - max );
8298
8302
memcpy (& scvt [j ], & s , sizeof (uint16_t ));
8299
8303
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt [j ]]);
8300
- sump [j ] += val ;
8304
+ sump [j ] += ( ggml_float ) val ;
8301
8305
SS [j ] = val ;
8302
8306
}
8303
8307
}
@@ -8309,7 +8313,7 @@ static void ggml_compute_forward_flash_attn_f32(
8309
8313
#endif
8310
8314
}
8311
8315
8312
- assert (sum > 0.0f );
8316
+ assert (sum > 0.0 );
8313
8317
8314
8318
sum = 1.0 /sum ;
8315
8319
ggml_vec_scale_f32 (M , S , sum );
@@ -8438,7 +8442,7 @@ static void ggml_compute_forward_flash_attn_f16(
8438
8442
const int ir0 = dr * ith ;
8439
8443
const int ir1 = MIN (ir0 + dr , nr );
8440
8444
8441
- const float scale = 1.0 / sqrt (( double ) D );
8445
+ const float scale = 1.0f / sqrtf ( D );
8442
8446
8443
8447
//printf("P=%d N=%d D=%d ir0=%d ir1=%d scale = %f\n", P, N, D, ir0, ir1, scale);
8444
8448
@@ -8502,7 +8506,7 @@ static void ggml_compute_forward_flash_attn_f16(
8502
8506
float max = - INFINITY ;
8503
8507
ggml_vec_max_f32 (M , & max , S );
8504
8508
8505
- float sum = 0.0f ;
8509
+ ggml_float sum = 0.0 ;
8506
8510
{
8507
8511
#ifdef GGML_SOFT_MAX_ACCELERATE
8508
8512
max = - max ;
@@ -8523,7 +8527,7 @@ static void ggml_compute_forward_flash_attn_f16(
8523
8527
ggml_fp16_t s = GGML_FP32_TO_FP16 (SS [j ] - max );
8524
8528
memcpy (& scvt [j ], & s , sizeof (uint16_t ));
8525
8529
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt [j ]]);
8526
- sump [j ] += val ;
8530
+ sump [j ] += ( ggml_float ) val ;
8527
8531
SS [j ] = val ;
8528
8532
}
8529
8533
}
@@ -8535,7 +8539,7 @@ static void ggml_compute_forward_flash_attn_f16(
8535
8539
#endif
8536
8540
}
8537
8541
8538
- assert (sum > 0.0f );
8542
+ assert (sum > 0.0 );
8539
8543
8540
8544
sum = 1.0 /sum ;
8541
8545
ggml_vec_scale_f32 (M , S , sum );
@@ -10066,7 +10070,7 @@ label=\"%d [%d, %d] | <x>%s",
10066
10070
fprintf (fp , " \"%p\" [ \
10067
10071
style = filled; fillcolor = %s; shape = record; \
10068
10072
label=\"<x>%.1e\"; ]\n" ,
10069
- (void * ) node , color , ggml_get_f32_1d (node , 0 ));
10073
+ (void * ) node , color , ( double ) ggml_get_f32_1d (node , 0 ));
10070
10074
} else {
10071
10075
fprintf (fp , " \"%p\" [ \
10072
10076
style = filled; fillcolor = %s; shape = record; \
@@ -10304,7 +10308,7 @@ static enum ggml_opt_result ggml_opt_adam(
10304
10308
if (params .past <= t ) {
10305
10309
const float rate = (pf [t %params .past ] - fx )/fx ;
10306
10310
10307
- if (fabs (rate ) < params .delta ) {
10311
+ if (fabsf (rate ) < params .delta ) {
10308
10312
return GGML_OPT_OK ;
10309
10313
}
10310
10314
}
@@ -10383,7 +10387,7 @@ static enum ggml_opt_result linesearch_backtracking(
10383
10387
const float dec = 0.5f ;
10384
10388
const float inc = 2.1f ;
10385
10389
10386
- if (* step <= 0. ) {
10390
+ if (* step <= 0.f ) {
10387
10391
return GGML_LINESEARCH_INVALID_PARAMETERS ;
10388
10392
}
10389
10393
@@ -10471,7 +10475,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10471
10475
struct ggml_cgraph * gb ) {
10472
10476
if (params .lbfgs .linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
10473
10477
params .lbfgs .linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE ) {
10474
- if (params .lbfgs .wolfe <= params .lbfgs .ftol || 1. <= params .lbfgs .wolfe ) {
10478
+ if (params .lbfgs .wolfe <= params .lbfgs .ftol || 1.f <= params .lbfgs .wolfe ) {
10475
10479
return GGML_OPT_INVALID_WOLFE ;
10476
10480
}
10477
10481
}
@@ -10592,8 +10596,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10592
10596
10593
10597
GGML_PRINT_DEBUG ("f = %10.6f\n" , ggml_get_f32_1d (f , 0 ));
10594
10598
10595
- if (xnorm < 1.0 ) {
10596
- xnorm = 1.0 ;
10599
+ if (xnorm < 1.0f ) {
10600
+ xnorm = 1.0f ;
10597
10601
}
10598
10602
if (gnorm /xnorm <= params .lbfgs .eps ) {
10599
10603
// converged
@@ -10606,7 +10610,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10606
10610
if (params .past <= k ) {
10607
10611
const float rate = (pf [k %params .past ] - fx )/fx ;
10608
10612
10609
- if (fabs (rate ) < params .delta ) {
10613
+ if (fabsf (rate ) < params .delta ) {
10610
10614
return GGML_OPT_OK ;
10611
10615
}
10612
10616
}
0 commit comments