From dfeda32abdcf35b2b8f4e98ea82b35409d6465e9 Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Fri, 6 Oct 2023 18:26:18 +0200 Subject: [PATCH 1/9] infill tokens correction --- examples/infill/infill.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 9ec75ce425b2a..9542d7b6cbe58 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -234,9 +234,12 @@ int main(int argc, char ** argv) { LOG("add_bos: %d\n", add_bos); std::vector embd_inp; - std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, add_bos); - std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, add_bos); + std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); + std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); + if (add_bos) { + inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); + } inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx)); embd_inp = inp_pfx; embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); From 6796e7450c3c406a7d2f10498db7083f21f8a7bb Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Fri, 6 Oct 2023 18:35:50 +0200 Subject: [PATCH 2/9] serverinfill tokens correction --- examples/server/server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c53a64867336f..fa5e52e823db6 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,9 +344,10 @@ struct llama_server_context void loadInfill() { - auto prefix_tokens = tokenize(params.input_prefix, true); // always add BOS - auto suffix_tokens = tokenize(params.input_suffix, true); // always add BOS + auto prefix_tokens = tokenize(params.input_prefix, false); + auto suffix_tokens = tokenize(params.input_suffix, false); prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); prefix_tokens.push_back(llama_token_middle(ctx)); From 377be2f39d065c352ece61c59bad79943091efcb Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Fri, 6 Oct 2023 20:34:04 +0200 Subject: [PATCH 3/9] removing any leading whitespace from infill suffix and removing leeading space token from suffix when params.escape --- examples/infill/infill.cpp | 5 +++++ examples/server/server.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 9542d7b6cbe58..81e037265e48f 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -235,7 +235,12 @@ int main(int argc, char ** argv) { std::vector embd_inp; std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); + params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); + const int space_token = 29871; + if (params.escape && inp_sfx.size() > 1 && inp_sfx[0] == space_token) { + inp_sfx.erase(inp_sfx.begin()); + } inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); if (add_bos) { inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fa5e52e823db6..53c0fb800dcdf 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,8 +344,13 @@ struct llama_server_context void loadInfill() { + params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); auto prefix_tokens = tokenize(params.input_prefix, false); auto suffix_tokens = tokenize(params.input_suffix, false); + const int space_token = 29871; + if (params.escape && suffix_tokens.size() > 1 && suffix_tokens[0] == space_token) { + suffix_tokens.erase(suffix_tokens.begin()); + } prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx)); From b4046aabbfe7aecd028ef0467e07077fca401c21 Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Fri, 6 Oct 2023 21:53:24 +0200 Subject: [PATCH 4/9] removing any leading whitespace from infill suffix and removing leeading space token from suffix when params.escape --- examples/infill/infill.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 81e037265e48f..4288ffed7cfa9 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -235,7 +235,7 @@ int main(int argc, char ** argv) { std::vector embd_inp; std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); - params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); + // params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); const int space_token = 29871; if (params.escape && inp_sfx.size() > 1 && inp_sfx[0] == space_token) { From 0526560759f858c0d49721be29f1c8b7274210b8 Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 09:08:30 +0200 Subject: [PATCH 5/9] only rm when params.escape, rm space if possible which is added back or rm added space token --- examples/infill/infill.cpp | 8 ++++++-- examples/server/server.cpp | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 4288ffed7cfa9..47ece81c6d624 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -233,12 +233,16 @@ int main(int argc, char ** argv) { const bool add_bos = llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM; LOG("add_bos: %d\n", add_bos); + bool suff_rm_leading_spc = params.escape; + if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + params.input_suffix.erase(0, 1); + suff_rm_leading_spc = false; + } std::vector embd_inp; std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); - // params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); const int space_token = 29871; - if (params.escape && inp_sfx.size() > 1 && inp_sfx[0] == space_token) { + if (suff_rm_leading_spc && inp_sfx[0] == space_token) { inp_sfx.erase(inp_sfx.begin()); } inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 53c0fb800dcdf..667f5db71e49a 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,11 +344,16 @@ struct llama_server_context void loadInfill() { - params.input_suffix.erase(0, params.input_suffix.find_first_not_of(" ")); + bool suff_rm_leading_spc = params.escape; + if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + params.input_suffix.erase(0, 1); + suff_rm_leading_spc = false; + } + auto prefix_tokens = tokenize(params.input_prefix, false); auto suffix_tokens = tokenize(params.input_suffix, false); const int space_token = 29871; - if (params.escape && suffix_tokens.size() > 1 && suffix_tokens[0] == space_token) { + if (suff_rm_leading_spc && suffix_tokens[0] == space_token) { suffix_tokens.erase(suffix_tokens.begin()); } prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); From 63ba0b621f21077c0e3bc6ba6a327534123cb738 Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 09:22:36 +0200 Subject: [PATCH 6/9] only rm when params.escape, rm space if possible which is added back or rm added space token --- examples/infill/infill.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 47ece81c6d624..9d97090cabe45 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -234,10 +234,11 @@ int main(int argc, char ** argv) { LOG("add_bos: %d\n", add_bos); bool suff_rm_leading_spc = params.escape; - if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { - params.input_suffix.erase(0, 1); - suff_rm_leading_spc = false; - } + // if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + // params.input_suffix.erase(0, 1); + // suff_rm_leading_spc = false; + // } + suff_rm_leading_spc = true; std::vector embd_inp; std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); From 003c15bfc5899e2cfa5c658208ddb78bad1a39bd Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 09:22:50 +0200 Subject: [PATCH 7/9] Revert "only rm when params.escape, rm space if possible which is added back or rm added space token" This reverts commit 63ba0b621f21077c0e3bc6ba6a327534123cb738. --- examples/infill/infill.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 9d97090cabe45..47ece81c6d624 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -234,11 +234,10 @@ int main(int argc, char ** argv) { LOG("add_bos: %d\n", add_bos); bool suff_rm_leading_spc = params.escape; - // if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { - // params.input_suffix.erase(0, 1); - // suff_rm_leading_spc = false; - // } - suff_rm_leading_spc = true; + if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + params.input_suffix.erase(0, 1); + suff_rm_leading_spc = false; + } std::vector embd_inp; std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); From c3a7f848f2d0987c1fea9854831d060c90ca42ba Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 12:07:07 +0200 Subject: [PATCH 8/9] fix interactive prompt escaping and fix server infill leading space handling --- examples/infill/infill.cpp | 21 +++++++++++++++++++-- examples/server/server.cpp | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 47ece81c6d624..d994de5e850c3 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -639,10 +639,27 @@ int main(int argc, char ** argv) { buffer.clear(); // done taking input, reset color console::set_display(console::reset); + + if (params.escape) { + //process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here + process_escapes(params.input_prefix); + process_escapes(params.input_suffix); + } + suff_rm_leading_spc = params.escape; + if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + params.input_suffix.erase(0, 1); + suff_rm_leading_spc = false; + } // tokenize new prefix and suffix - std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, add_bos); - std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, add_bos); + std::vector inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false); + std::vector inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false); + if (suff_rm_leading_spc && inp_sfx[0] == space_token) { + inp_sfx.erase(inp_sfx.begin()); + } inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(ctx)); + if (add_bos) { + inp_pfx.insert(inp_pfx.begin(), llama_token_bos(ctx)); + } inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(ctx)); embd_inp = inp_pfx; embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 667f5db71e49a..dc57e55d9435e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,7 +344,7 @@ struct llama_server_context void loadInfill() { - bool suff_rm_leading_spc = params.escape; + bool suff_rm_leading_spc = true; if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { params.input_suffix.erase(0, 1); suff_rm_leading_spc = false; From b1b6beff2bb5f8387312e19e34da91fcdb68d5ed Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Sat, 7 Oct 2023 12:55:59 +0200 Subject: [PATCH 9/9] rm unnecessary bool check --- examples/server/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index dc57e55d9435e..8c5318c650ae8 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -345,7 +345,7 @@ struct llama_server_context void loadInfill() { bool suff_rm_leading_spc = true; - if (suff_rm_leading_spc && params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { + if (params.input_suffix.find_first_of(" ") == 0 && params.input_suffix.size() > 1) { params.input_suffix.erase(0, 1); suff_rm_leading_spc = false; }