@@ -1270,12 +1270,12 @@ inline static void ggml_vec_dot_f32(const int n, float * restrict s, const float
1270
1270
1271
1271
// leftovers
1272
1272
for (int i = np ; i < n ; ++ i ) {
1273
- sumf += x [i ]* y [i ];
1273
+ sumf += ( ggml_float )( x [i ]* y [i ]) ;
1274
1274
}
1275
1275
#else
1276
1276
// scalar
1277
1277
for (int i = 0 ; i < n ; ++ i ) {
1278
- sumf += x [i ]* y [i ];
1278
+ sumf += ( ggml_float )( x [i ]* y [i ]) ;
1279
1279
}
1280
1280
#endif
1281
1281
@@ -1348,11 +1348,11 @@ inline static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t
1348
1348
1349
1349
// leftovers
1350
1350
for (int i = np ; i < n ; ++ i ) {
1351
- sumf += GGML_FP16_TO_FP32 (x [i ])* GGML_FP16_TO_FP32 (y [i ]);
1351
+ sumf += ( ggml_float )( GGML_FP16_TO_FP32 (x [i ])* GGML_FP16_TO_FP32 (y [i ]) );
1352
1352
}
1353
1353
#else
1354
1354
for (int i = 0 ; i < n ; ++ i ) {
1355
- sumf += GGML_FP16_TO_FP32 (x [i ])* GGML_FP16_TO_FP32 (y [i ]);
1355
+ sumf += ( ggml_float )( GGML_FP16_TO_FP32 (x [i ])* GGML_FP16_TO_FP32 (y [i ]) );
1356
1356
}
1357
1357
#endif
1358
1358
@@ -1845,13 +1845,13 @@ inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * re
1845
1845
// leftovers
1846
1846
for (int i = np ; i < n ; ++ i ) {
1847
1847
for (int j = 0 ; j < GGML_VEC_DOT_UNROLL ; ++ j ) {
1848
- sumf [j ] += GGML_FP16_TO_FP32 (x [j ][i ])* GGML_FP16_TO_FP32 (y [i ]);
1848
+ sumf [j ] += ( ggml_float )( GGML_FP16_TO_FP32 (x [j ][i ])* GGML_FP16_TO_FP32 (y [i ]) );
1849
1849
}
1850
1850
}
1851
1851
#else
1852
1852
for (int i = 0 ; i < n ; ++ i ) {
1853
1853
for (int j = 0 ; j < GGML_VEC_DOT_UNROLL ; ++ j ) {
1854
- sumf [j ] += GGML_FP16_TO_FP32 (x [j ][i ])* GGML_FP16_TO_FP32 (y [i ]);
1854
+ sumf [j ] += ( ggml_float )( GGML_FP16_TO_FP32 (x [j ][i ])* GGML_FP16_TO_FP32 (y [i ]) );
1855
1855
}
1856
1856
}
1857
1857
#endif
@@ -2091,11 +2091,11 @@ inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) {
2091
2091
inline static void ggml_vec_step_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? 1.f : 0.f ; }
2092
2092
inline static void ggml_vec_relu_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = (x [i ] > 0.f ) ? x [i ] : 0.f ; }
2093
2093
2094
- static const ggml_float GELU_COEF_A = 0.044715 ;
2095
- static const ggml_float SQRT_2_OVER_PI = 0.79788456080286535587989211986876 ;
2094
+ static const float GELU_COEF_A = 0.044715 ;
2095
+ static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876 ;
2096
2096
2097
2097
inline static float ggml_gelu_f32 (float x ) {
2098
- return 0.5 * x * (1.0 + tanh (SQRT_2_OVER_PI * x * (1.0 + GELU_COEF_A * x * x )));
2098
+ return 0.5f * x * (1.0f + tanhf (SQRT_2_OVER_PI * x * (1.0f + GELU_COEF_A * x * x )));
2099
2099
}
2100
2100
2101
2101
inline static void ggml_vec_gelu_f16 (const int n , ggml_fp16_t * y , const ggml_fp16_t * x ) {
@@ -2124,7 +2124,7 @@ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
2124
2124
2125
2125
// Sigmoid Linear Unit (SiLU) function
2126
2126
inline static float ggml_silu_f32 (float x ) {
2127
- return x /(1.0 + exp (- x ));
2127
+ return x /(1.0f + expf (- x ));
2128
2128
}
2129
2129
2130
2130
inline static void ggml_vec_silu_f16 (const int n , ggml_fp16_t * y , const ggml_fp16_t * x ) {
@@ -2155,7 +2155,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
2155
2155
#ifndef GGML_USE_ACCELERATE
2156
2156
ggml_float sum = 0.0 ;
2157
2157
for (int i = 0 ; i < n ; ++ i ) {
2158
- sum += x [i ];
2158
+ sum += ( ggml_float ) x [i ];
2159
2159
}
2160
2160
* s = sum ;
2161
2161
#else
@@ -2165,7 +2165,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
2165
2165
2166
2166
inline static void ggml_vec_max_f32 (const int n , float * s , const float * x ) {
2167
2167
#ifndef GGML_USE_ACCELERATE
2168
- ggml_float max = - INFINITY ;
2168
+ float max = - INFINITY ;
2169
2169
for (int i = 0 ; i < n ; ++ i ) {
2170
2170
max = MAX (max , x [i ]);
2171
2171
}
@@ -2175,7 +2175,10 @@ inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
2175
2175
#endif
2176
2176
}
2177
2177
2178
- inline static void ggml_vec_norm_inv_f32 (const int n , float * s , const float * x ) { ggml_vec_norm_f32 (n , s , x ); * s = 1. /(* s ); }
2178
+ inline static void ggml_vec_norm_inv_f32 (const int n , float * s , const float * x ) {
2179
+ ggml_vec_norm_f32 (n , s , x );
2180
+ * s = 1.f /(* s );
2181
+ }
2179
2182
2180
2183
//
2181
2184
// logging
@@ -5569,31 +5572,32 @@ static void ggml_compute_forward_norm_f32(
5569
5572
const size_t nb2 = dst -> nb [2 ];
5570
5573
const size_t nb3 = dst -> nb [3 ];
5571
5574
5572
- const ggml_float eps = 1e-5f ; // TODO: make this a parameter
5575
+ const float eps = 1e-5f ; // TODO: make this a parameter
5573
5576
5574
5577
// TODO: optimize
5575
5578
for (int i03 = 0 ; i03 < ne03 ; i03 ++ ) {
5576
5579
for (int i02 = 0 ; i02 < ne02 ; i02 ++ ) {
5577
5580
for (int i01 = ith ; i01 < ne01 ; i01 += nth ) {
5578
5581
const float * x = (float * ) ((char * ) src0 -> data + i01 * nb01 + i02 * nb02 + i03 * nb03 );
5579
5582
5580
- ggml_float mean = 0.0 ;
5583
+ ggml_float sum = 0.0 ;
5581
5584
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5582
- mean += x [i00 ];
5585
+ sum += ( ggml_float ) x [i00 ];
5583
5586
}
5584
5587
5585
- mean /= ne00 ;
5588
+ float mean = sum / ne00 ;
5586
5589
5587
5590
float * y = (float * ) ((char * ) dst -> data + i01 * nb1 + i02 * nb2 + i03 * nb3 );
5588
5591
5589
5592
ggml_float sum2 = 0.0 ;
5590
5593
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5591
- ggml_float v = x [i00 ] - mean ;
5594
+ float v = x [i00 ] - mean ;
5592
5595
y [i00 ] = v ;
5593
- sum2 += v * v ;
5596
+ sum2 += ( ggml_float )( v * v ) ;
5594
5597
}
5595
5598
5596
- const float scale = 1.0 /sqrt (sum2 /ne00 + eps );
5599
+ float variance = sum2 /ne00 ;
5600
+ const float scale = 1.0f /sqrtf (variance + eps );
5597
5601
5598
5602
ggml_vec_scale_f32 (ne00 , y , scale );
5599
5603
}
@@ -5651,20 +5655,20 @@ static void ggml_compute_forward_rms_norm_f32(
5651
5655
const size_t nb2 = dst -> nb [2 ];
5652
5656
const size_t nb3 = dst -> nb [3 ];
5653
5657
5654
- const ggml_float eps = 1e-6f ; // TODO: make this a parameter
5658
+ const float eps = 1e-6f ; // TODO: make this a parameter
5655
5659
5656
5660
// TODO: optimize
5657
5661
for (int i03 = 0 ; i03 < ne03 ; i03 ++ ) {
5658
5662
for (int i02 = 0 ; i02 < ne02 ; i02 ++ ) {
5659
5663
for (int i01 = ith ; i01 < ne01 ; i01 += nth ) {
5660
5664
const float * x = (float * ) ((char * ) src0 -> data + i01 * nb01 + i02 * nb02 + i03 * nb03 );
5661
5665
5662
- ggml_float mean = 0.0 ;
5666
+ ggml_float sum = 0.0 ;
5663
5667
for (int i00 = 0 ; i00 < ne00 ; i00 ++ ) {
5664
- mean += x [i00 ] * x [i00 ];
5668
+ sum += ( ggml_float )( x [i00 ] * x [i00 ]) ;
5665
5669
}
5666
5670
5667
- mean /= ne00 ;
5671
+ float mean = sum / ne00 ;
5668
5672
5669
5673
float * y = (float * ) ((char * ) dst -> data + i01 * nb1 + i02 * nb2 + i03 * nb3 );
5670
5674
@@ -5673,7 +5677,7 @@ static void ggml_compute_forward_rms_norm_f32(
5673
5677
// y[i00] = x[i00];
5674
5678
// }
5675
5679
5676
- const float scale = 1.0 / sqrt (mean + eps );
5680
+ const float scale = 1.0f / sqrtf (mean + eps );
5677
5681
5678
5682
ggml_vec_scale_f32 (ne00 , y , scale );
5679
5683
}
@@ -7328,12 +7332,12 @@ static void ggml_compute_forward_soft_max_f32(
7328
7332
ggml_fp16_t s = GGML_FP32_TO_FP16 (p [i ] - max );
7329
7333
memcpy (& scvt , & s , sizeof (scvt ));
7330
7334
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt ]);
7331
- sum += val ;
7335
+ sum += ( ggml_float ) val ;
7332
7336
p [i ] = val ;
7333
7337
}
7334
7338
}
7335
7339
7336
- assert (sum > 0.0f );
7340
+ assert (sum > 0.0 );
7337
7341
7338
7342
sum = 1.0 /sum ;
7339
7343
ggml_vec_scale_f32 (nc , p , sum );
@@ -8197,7 +8201,7 @@ static void ggml_compute_forward_flash_attn_f32(
8197
8201
float max = - INFINITY ;
8198
8202
ggml_vec_max_f32 (M , & max , S );
8199
8203
8200
- float sum = 0.0f ;
8204
+ ggml_float sum = 0.0 ;
8201
8205
{
8202
8206
#ifdef GGML_SOFT_MAX_ACCELERATE
8203
8207
max = - max ;
@@ -8218,7 +8222,7 @@ static void ggml_compute_forward_flash_attn_f32(
8218
8222
ggml_fp16_t s = GGML_FP32_TO_FP16 (SS [j ] - max );
8219
8223
memcpy (& scvt [j ], & s , sizeof (uint16_t ));
8220
8224
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt [j ]]);
8221
- sump [j ] += val ;
8225
+ sump [j ] += ( ggml_float ) val ;
8222
8226
SS [j ] = val ;
8223
8227
}
8224
8228
}
@@ -8230,7 +8234,7 @@ static void ggml_compute_forward_flash_attn_f32(
8230
8234
#endif
8231
8235
}
8232
8236
8233
- assert (sum > 0.0f );
8237
+ assert (sum > 0.0 );
8234
8238
8235
8239
sum = 1.0 /sum ;
8236
8240
ggml_vec_scale_f32 (M , S , sum );
@@ -8423,7 +8427,7 @@ static void ggml_compute_forward_flash_attn_f16(
8423
8427
float max = - INFINITY ;
8424
8428
ggml_vec_max_f32 (M , & max , S );
8425
8429
8426
- float sum = 0.0f ;
8430
+ ggml_float sum = 0.0 ;
8427
8431
{
8428
8432
#ifdef GGML_SOFT_MAX_ACCELERATE
8429
8433
max = - max ;
@@ -8444,7 +8448,7 @@ static void ggml_compute_forward_flash_attn_f16(
8444
8448
ggml_fp16_t s = GGML_FP32_TO_FP16 (SS [j ] - max );
8445
8449
memcpy (& scvt [j ], & s , sizeof (uint16_t ));
8446
8450
const float val = GGML_FP16_TO_FP32 (table_exp_f16 [scvt [j ]]);
8447
- sump [j ] += val ;
8451
+ sump [j ] += ( ggml_float ) val ;
8448
8452
SS [j ] = val ;
8449
8453
}
8450
8454
}
@@ -8456,7 +8460,7 @@ static void ggml_compute_forward_flash_attn_f16(
8456
8460
#endif
8457
8461
}
8458
8462
8459
- assert (sum > 0.0f );
8463
+ assert (sum > 0.0 );
8460
8464
8461
8465
sum = 1.0 /sum ;
8462
8466
ggml_vec_scale_f32 (M , S , sum );
@@ -9987,7 +9991,7 @@ label=\"%d [%d, %d] | <x>%s",
9987
9991
fprintf (fp , " \"%p\" [ \
9988
9992
style = filled; fillcolor = %s; shape = record; \
9989
9993
label=\"<x>%.1e\"; ]\n" ,
9990
- (void * ) node , color , ggml_get_f32_1d (node , 0 ));
9994
+ (void * ) node , color , ( double ) ggml_get_f32_1d (node , 0 ));
9991
9995
} else {
9992
9996
fprintf (fp , " \"%p\" [ \
9993
9997
style = filled; fillcolor = %s; shape = record; \
@@ -10225,7 +10229,7 @@ static enum ggml_opt_result ggml_opt_adam(
10225
10229
if (params .past <= t ) {
10226
10230
const float rate = (pf [t %params .past ] - fx )/fx ;
10227
10231
10228
- if (fabs (rate ) < params .delta ) {
10232
+ if (fabsf (rate ) < params .delta ) {
10229
10233
return GGML_OPT_OK ;
10230
10234
}
10231
10235
}
@@ -10304,7 +10308,7 @@ static enum ggml_opt_result linesearch_backtracking(
10304
10308
const float dec = 0.5f ;
10305
10309
const float inc = 2.1f ;
10306
10310
10307
- if (* step <= 0. ) {
10311
+ if (* step <= 0.f ) {
10308
10312
return GGML_LINESEARCH_INVALID_PARAMETERS ;
10309
10313
}
10310
10314
@@ -10392,7 +10396,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10392
10396
struct ggml_cgraph * gb ) {
10393
10397
if (params .lbfgs .linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
10394
10398
params .lbfgs .linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE ) {
10395
- if (params .lbfgs .wolfe <= params .lbfgs .ftol || 1. <= params .lbfgs .wolfe ) {
10399
+ if (params .lbfgs .wolfe <= params .lbfgs .ftol || 1.f <= params .lbfgs .wolfe ) {
10396
10400
return GGML_OPT_INVALID_WOLFE ;
10397
10401
}
10398
10402
}
@@ -10513,8 +10517,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10513
10517
10514
10518
GGML_PRINT_DEBUG ("f = %10.6f\n" , ggml_get_f32_1d (f , 0 ));
10515
10519
10516
- if (xnorm < 1.0 ) {
10517
- xnorm = 1.0 ;
10520
+ if (xnorm < 1.0f ) {
10521
+ xnorm = 1.0f ;
10518
10522
}
10519
10523
if (gnorm /xnorm <= params .lbfgs .eps ) {
10520
10524
// converged
@@ -10527,7 +10531,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
10527
10531
if (params .past <= k ) {
10528
10532
const float rate = (pf [k %params .past ] - fx )/fx ;
10529
10533
10530
- if (fabs (rate ) < params .delta ) {
10534
+ if (fabsf (rate ) < params .delta ) {
10531
10535
return GGML_OPT_OK ;
10532
10536
}
10533
10537
}
0 commit comments