From 3732ad9c227125986e6d1efc26b826154120e65d Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Wed, 10 Apr 2024 23:05:14 +0100 Subject: [PATCH 1/6] grammars: reserve rejects & next candidates --- llama.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llama.cpp b/llama.cpp index b6e2ade9134d9..a11bf78dca8bd 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11951,6 +11951,7 @@ static std::vector llama_grammar_reject_candidates_for_ const std::vector & candidates) { std::vector rejects; + rejects.reserve(candidates.size()); if (stack.empty()) { for (const auto & tok : candidates) { @@ -11964,6 +11965,8 @@ static std::vector llama_grammar_reject_candidates_for_ const llama_grammar_element * stack_pos = stack.back(); std::vector next_candidates; + next_candidates.reserve(candidates.size()); + for (const auto & tok : candidates) { if (*tok.code_points == 0) { // reached end of full codepoints in token, reject iff it ended in a partial sequence From 47e37dd955879d27d4037222bdf18cec123a900b Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 15:11:40 +0100 Subject: [PATCH 2/6] grammars: reuse new_stacks --- llama.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llama.cpp b/llama.cpp index a11bf78dca8bd..13a6807fc14c5 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11912,12 +11912,13 @@ static void llama_grammar_advance_stack( // be positioned at a character range (see `llama_grammar_advance_stack`), and // produces the N possible stacks if the given char is accepted at those // positions -std::vector> llama_grammar_accept( +void llama_grammar_accept( const std::vector> & rules, const std::vector> & stacks, - const uint32_t chr) { + const uint32_t chr, + std::vector> & new_stacks) { - std::vector> new_stacks; + new_stacks.clear(); for (const auto & stack : stacks) { if (stack.empty()) { @@ -11936,8 +11937,6 @@ std::vector> llama_grammar_accept( llama_grammar_advance_stack(rules, new_stack, new_stacks); } } - - return new_stacks; } static std::vector llama_grammar_reject_candidates( @@ -12774,8 +12773,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar // Note terminating 0 in decoded string const auto decoded = decode_utf8(piece, grammar->partial_utf8); const auto & code_points = decoded.first; + std::vector> tmp_new_stacks; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks); + tmp_new_stacks.swap(grammar->stacks); } grammar->partial_utf8 = decoded.second; GGML_ASSERT(!grammar->stacks.empty()); From 763b41e2aa47e4168077984ac9fc75d39fa9e2b3 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 15:47:00 +0100 Subject: [PATCH 3/6] grammars: fix missing sig change in llama.h --- llama.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llama.h b/llama.h index b770a275ff02f..b5da686f7b7e5 100644 --- a/llama.h +++ b/llama.h @@ -1097,10 +1097,11 @@ const std::vector> & llama_internal struct llama_context * ctx ); -std::vector> llama_grammar_accept( +void llama_grammar_accept( const std::vector> & rules, const std::vector> & stacks, - const uint32_t chr); + const uint32_t chr, + std::vector> & new_stacks); std::pair, llama_partial_utf8> decode_utf8( const std::string & src, From db787a4489d8bdc238a3c8896f8b82cacc5e2e37 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 16:20:41 +0100 Subject: [PATCH 4/6] grammars: fix test (api changed) --- tests/test-grammar-integration.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 0a9c3b6f5f7c3..2d8f228e3769d 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -38,7 +38,7 @@ number ::= [0-9]+)"""; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { auto prev_stacks = grammar->stacks; - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); assert(!grammar->stacks.empty()); } @@ -138,7 +138,7 @@ ws ::= [ \t\n\r]?)"""; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { ++pos; auto prev_stacks = grammar->stacks; - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); // Expect that each code point will not cause the grammar to fail if (grammar->stacks.empty()) { @@ -173,7 +173,7 @@ ws ::= [ \t\n\r]?)"""; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { auto prev_stacks = grammar->stacks; - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); if (grammar->stacks.empty()) { parse_failed = true; break; From cb77a8db1d9a50a3d50e1385669d67cc232e0c5f Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 16:47:19 +0100 Subject: [PATCH 5/6] grammars: update gbnf-validator.cpp --- examples/gbnf-validator/gbnf-validator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp index e4c0c1689c7a4..091069ffa699c 100644 --- a/examples/gbnf-validator/gbnf-validator.cpp +++ b/examples/gbnf-validator/gbnf-validator.cpp @@ -17,7 +17,7 @@ static bool llama_sample_grammar_string(struct llama_grammar * grammar, const st size_t pos = 0; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { auto prev_stacks = grammar->stacks; - grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it); + llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks); if (grammar->stacks.empty()) { error_pos = pos; error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'"; From 1e0f466920dbd6747852db864118266e6f256700 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Thu, 11 Apr 2024 18:51:19 +0100 Subject: [PATCH 6/6] grammars: simpler syntax (no swap) --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 13a6807fc14c5..ad07059c4533a 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12776,7 +12776,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar std::vector> tmp_new_stacks; for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) { llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks); - tmp_new_stacks.swap(grammar->stacks); + grammar->stacks = tmp_new_stacks; } grammar->partial_utf8 = decoded.second; GGML_ASSERT(!grammar->stacks.empty());