From 3732ad9c227125986e6d1efc26b826154120e65d Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Wed, 10 Apr 2024 23:05:14 +0100
Subject: [PATCH 1/6] grammars: reserve rejects & next candidates

---
 llama.cpp | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/llama.cpp b/llama.cpp
index b6e2ade9134d9..a11bf78dca8bd 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11951,6 +11951,7 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
         const std::vector<llama_grammar_candidate>            & candidates) {
 
     std::vector<llama_grammar_candidate> rejects;
+    rejects.reserve(candidates.size());
 
     if (stack.empty()) {
         for (const auto & tok : candidates) {
@@ -11964,6 +11965,8 @@ static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates_for_
     const llama_grammar_element * stack_pos = stack.back();
 
     std::vector<llama_grammar_candidate> next_candidates;
+    next_candidates.reserve(candidates.size());
+
     for (const auto & tok : candidates) {
         if (*tok.code_points == 0) {
             // reached end of full codepoints in token, reject iff it ended in a partial sequence

From 47e37dd955879d27d4037222bdf18cec123a900b Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Thu, 11 Apr 2024 15:11:40 +0100
Subject: [PATCH 2/6] grammars: reuse new_stacks

---
 llama.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index a11bf78dca8bd..13a6807fc14c5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11912,12 +11912,13 @@ static void llama_grammar_advance_stack(
 // be positioned at a character range (see `llama_grammar_advance_stack`), and
 // produces the N possible stacks if the given char is accepted at those
 // positions
-std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
+void llama_grammar_accept(
         const std::vector<std::vector<llama_grammar_element>>         & rules,
         const std::vector<std::vector<const llama_grammar_element *>> & stacks,
-        const uint32_t                                                  chr) {
+        const uint32_t                                                  chr,
+        std::vector<std::vector<const llama_grammar_element *>>       & new_stacks) {
 
-    std::vector<std::vector<const llama_grammar_element *>> new_stacks;
+    new_stacks.clear();
 
     for (const auto & stack : stacks) {
         if (stack.empty()) {
@@ -11936,8 +11937,6 @@ std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
             llama_grammar_advance_stack(rules, new_stack, new_stacks);
         }
     }
-
-    return new_stacks;
 }
 
 static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
@@ -12774,8 +12773,10 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
     // Note terminating 0 in decoded string
     const auto   decoded     = decode_utf8(piece, grammar->partial_utf8);
     const auto & code_points = decoded.first;
+    std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
-        grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+        llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
+        tmp_new_stacks.swap(grammar->stacks);
     }
     grammar->partial_utf8 = decoded.second;
     GGML_ASSERT(!grammar->stacks.empty());

From 763b41e2aa47e4168077984ac9fc75d39fa9e2b3 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Thu, 11 Apr 2024 15:47:00 +0100
Subject: [PATCH 3/6] grammars: fix missing sig change in llama.h

---
 llama.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llama.h b/llama.h
index b770a275ff02f..b5da686f7b7e5 100644
--- a/llama.h
+++ b/llama.h
@@ -1097,10 +1097,11 @@ const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal
     struct llama_context * ctx
 );
 
-std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
+void llama_grammar_accept(
         const std::vector<std::vector<llama_grammar_element>>         & rules,
         const std::vector<std::vector<const llama_grammar_element *>> & stacks,
-        const uint32_t                                                  chr);
+        const uint32_t                                                  chr,
+        std::vector<std::vector<const llama_grammar_element *>>       & new_stacks);
 
 std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
         const std::string & src,

From db787a4489d8bdc238a3c8896f8b82cacc5e2e37 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Thu, 11 Apr 2024 16:20:41 +0100
Subject: [PATCH 4/6] grammars: fix test (api changed)

---
 tests/test-grammar-integration.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp
index 0a9c3b6f5f7c3..2d8f228e3769d 100644
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -38,7 +38,7 @@ number ::= [0-9]+)""";
 
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
         auto prev_stacks = grammar->stacks;
-        grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+        llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
         assert(!grammar->stacks.empty());
     }
 
@@ -138,7 +138,7 @@ ws ::= [ \t\n\r]?)""";
         for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
             ++pos;
             auto prev_stacks = grammar->stacks;
-            grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+            llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
 
             // Expect that each code point will not cause the grammar to fail
             if (grammar->stacks.empty()) {
@@ -173,7 +173,7 @@ ws ::= [ \t\n\r]?)""";
 
         for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
             auto prev_stacks = grammar->stacks;
-            grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+            llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
             if (grammar->stacks.empty()) {
                 parse_failed = true;
                 break;

From cb77a8db1d9a50a3d50e1385669d67cc232e0c5f Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Thu, 11 Apr 2024 16:47:19 +0100
Subject: [PATCH 5/6] grammars: update gbnf-validator.cpp

---
 examples/gbnf-validator/gbnf-validator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/gbnf-validator/gbnf-validator.cpp b/examples/gbnf-validator/gbnf-validator.cpp
index e4c0c1689c7a4..091069ffa699c 100644
--- a/examples/gbnf-validator/gbnf-validator.cpp
+++ b/examples/gbnf-validator/gbnf-validator.cpp
@@ -17,7 +17,7 @@ static bool llama_sample_grammar_string(struct llama_grammar * grammar, const st
     size_t pos = 0;
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
         auto prev_stacks = grammar->stacks;
-        grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+        llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
         if (grammar->stacks.empty()) {
             error_pos = pos;
             error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'";

From 1e0f466920dbd6747852db864118266e6f256700 Mon Sep 17 00:00:00 2001
From: Olivier Chafik <ochafik@google.com>
Date: Thu, 11 Apr 2024 18:51:19 +0100
Subject: [PATCH 6/6] grammars: simpler syntax (no swap)

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 13a6807fc14c5..ad07059c4533a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12776,7 +12776,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
     std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
     for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
         llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
-        tmp_new_stacks.swap(grammar->stacks);
+        grammar->stacks = tmp_new_stacks;
     }
     grammar->partial_utf8 = decoded.second;
     GGML_ASSERT(!grammar->stacks.empty());