@@ -16441,14 +16441,15 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16441
16441
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
16442
16442
else new_type = GGML_TYPE_IQ4_XS;
16443
16443
}
16444
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16444
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
16445
+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) {
16445
16446
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16446
- else if (qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ4_XS;
16447
+ else if (qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ4_XS;
16447
16448
else new_type = GGML_TYPE_Q4_K;
16448
16449
}
16449
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16450
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16450
16451
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16451
- else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20 ) new_type = GGML_TYPE_Q4_K;
16452
+ else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
16452
16453
else new_type = GGML_TYPE_Q5_K;
16453
16454
}
16454
16455
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
@@ -16457,7 +16458,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16457
16458
}
16458
16459
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
16459
16460
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16460
- else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_Q5_K;
16461
+ else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q5_K;
16461
16462
else new_type = GGML_TYPE_Q6_K;
16462
16463
}
16463
16464
else if (new_type != GGML_TYPE_Q8_0) {
@@ -16488,18 +16489,18 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
16488
16489
new_type = GGML_TYPE_IQ2_S;
16489
16490
}
16490
16491
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
16491
- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ2_S;
16492
+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ2_S;
16492
16493
else new_type = GGML_TYPE_IQ3_XXS;
16493
16494
}
16494
16495
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
16495
16496
new_type = GGML_TYPE_IQ3_XXS;
16496
16497
}
16497
- else if ( || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
16498
- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ3_XXS;
16498
+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
16499
+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_XXS;
16499
16500
else new_type = GGML_TYPE_IQ3_S;
16500
16501
}
16501
16502
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL) {
16502
- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ3_S;
16503
+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_S;
16503
16504
new_type = GGML_TYPE_IQ4_XS;
16504
16505
}
16505
16506
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_XXS;
0 commit comments