From e67b15f5c2c010b49b27f01622c859c239d6865d Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Mon, 26 Jun 2023 22:58:22 +0800
Subject: [PATCH 1/5] Use unsigned for random seed

---
 examples/common.h                                           | 2 +-
 examples/embedding/embedding.cpp                            | 4 ++--
 examples/main/README.md                                     | 2 +-
 examples/main/main.cpp                                      | 4 ++--
 examples/perplexity/perplexity.cpp                          | 4 ++--
 examples/server/README.md                                   | 2 +-
 .../train-text-from-scratch/train-text-from-scratch.cpp     | 6 +++---
 llama.cpp                                                   | 6 +++---
 llama.h                                                     | 4 ++--
 9 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/examples/common.h b/examples/common.h
index 9d213d6d04149..66e5672917996 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -22,7 +22,7 @@
 int32_t get_num_physical_cores();
 
 struct gpt_params {
-    int32_t seed                            = -1;  // RNG seed
+    uint32_t seed                           = -1;  // RNG seed
     int32_t n_threads                       = get_num_physical_cores();
     int32_t n_predict                       = -1;  // new tokens to predict
     int32_t n_ctx                           = 512; // context size
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 3cd5bb794957c..dd590a7d22da6 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -24,11 +24,11 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed < 0) {
+    if (params.seed == -1) {
         params.seed = time(NULL);
     }
 
-    fprintf(stderr, "%s: seed  = %d\n", __func__, params.seed);
+    fprintf(stderr, "%s: seed  = %u\n", __func__, params.seed);
 
     std::mt19937 rng(params.seed);
     if (params.random_prompt) {
diff --git a/examples/main/README.md b/examples/main/README.md
index 9ba1eb3849b2e..37538613042b0 100644
--- a/examples/main/README.md
+++ b/examples/main/README.md
@@ -242,7 +242,7 @@ Example usage: `--logit-bias 29905-inf`
 
 ### RNG Seed
 
--   `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
+-   `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
 
 The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run.
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index bcdc98d611250..5439bd1786d31 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -94,11 +94,11 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed < 0) {
+    if (params.seed == -1) {
         params.seed = time(NULL);
     }
 
-    fprintf(stderr, "%s: seed  = %d\n", __func__, params.seed);
+    fprintf(stderr, "%s: seed  = %u\n", __func__, params.seed);
 
     std::mt19937 rng(params.seed);
     if (params.random_prompt) {
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index f8a6cb516d067..0782a28472d0d 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -136,11 +136,11 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed < 0) {
+    if (params.seed == -1) {
         params.seed = time(NULL);
     }
 
-    fprintf(stderr, "%s: seed  = %d\n", __func__, params.seed);
+    fprintf(stderr, "%s: seed  = %u\n", __func__, params.seed);
 
     std::mt19937 rng(params.seed);
     if (params.random_prompt) {
diff --git a/examples/server/README.md b/examples/server/README.md
index fa95c00441bc2..ba4b2fec9d1df 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -152,7 +152,7 @@ node .
 
     `mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).
 
-    `seed`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
+    `seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
 
     `ignore_eos`: Ignore end of stream token and continue generating (default: false).
 
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index a05881d1640e7..0263054318aa5 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -2768,7 +2768,7 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p
     fprintf(stderr, "  --checkpoint-in FNAME      path from which to load training checkpoint (default '%s')\n", params->fn_checkpoint_in);
     fprintf(stderr, "  --checkpoint-out FNAME     path to save training checkpoint (default '%s')\n", params->fn_checkpoint_out);
     fprintf(stderr, "  --model-out FNAME          path to save ggml model (default '%s')\n", params->fn_model_out);
-    fprintf(stderr, "  -s SEED, --seed SEED       RNG seed (default: -1, use random seed for < 0)\n");
+    fprintf(stderr, "  -s SEED, --seed SEED       RNG seed (default: -1, use random seed for -1)\n");
     fprintf(stderr, "  -c N, --ctx N              Context size used during training (default %d)\n", params->n_ctx);
     fprintf(stderr, "  --embd N                   Embedding size used for new models (default %d)\n", params->n_embd);
     fprintf(stderr, "  --mult N                   Mult size used for new models, influences feedforward size. (default %d)\n", params->n_mult);
@@ -3034,10 +3034,10 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    if (params.seed < 0) {
+    if (params.seed == -1) {
         params.seed = time(NULL);
     }
-    printf("%s: seed: %d\n", __func__, params.seed);
+    printf("%s: seed: %u\n", __func__, params.seed);
     srand(params.seed);
 
     struct llama_context_params llama_params = llama_context_default_params();
diff --git a/llama.cpp b/llama.cpp
index 2482bdd18d2e7..232df6b4e8cff 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -938,7 +938,7 @@ static bool kv_cache_init(
 
 struct llama_context_params llama_context_default_params() {
     struct llama_context_params result = {
-        /*.seed                        =*/ -1,
+        /*.seed                        =*/ (unsigned int)-1,
         /*.n_ctx                       =*/ 512,
         /*.n_batch                     =*/ 512,
         /*.gpu_layers                  =*/ 0,
@@ -3091,8 +3091,8 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
 
 #define LLAMA_MAX_RNG_STATE (64*1024)
 
-void llama_set_rng_seed(struct llama_context * ctx, int seed) {
-    if (seed < 0) {
+void llama_set_rng_seed(struct llama_context * ctx, unsigned int seed) {
+    if (seed == -1) {
         seed = time(NULL);
     }
     ctx->rng.seed(seed);
diff --git a/llama.h b/llama.h
index 76239be25fc22..b5cdc4b173888 100644
--- a/llama.h
+++ b/llama.h
@@ -81,7 +81,7 @@ extern "C" {
     typedef void (*llama_progress_callback)(float progress, void *ctx);
 
    struct llama_context_params {
-        int seed;                              // RNG seed, -1 for random
+        unsigned int seed;                     // RNG seed, -1 for random
         int n_ctx;                             // text context
         int n_batch;                           // prompt processing batch size
         int n_gpu_layers;                      // number of layers to store in VRAM
@@ -196,7 +196,7 @@ extern "C" {
     LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
 
     // Sets the current rng seed.
-    LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed);
+    LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, unsigned int seed);
 
     // Returns the maximum size in bytes of the state (rng, logits, embedding
     // and kv_cache) - will often be smaller after compacting tokens

From 74fe5fc1ea45b37d325b9f815756969f9ad8ef68 Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Tue, 27 Jun 2023 08:17:35 +0800
Subject: [PATCH 2/5] Change according to the review

---
 examples/common.cpp                                  |  2 +-
 examples/embedding/embedding.cpp                     |  2 +-
 examples/main/main.cpp                               |  2 +-
 examples/perplexity/perplexity.cpp                   |  2 +-
 .../train-text-from-scratch.cpp                      |  2 +-
 llama.cpp                                            |  4 ++--
 llama.h                                              | 12 +++++++-----
 7 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/examples/common.cpp b/examples/common.cpp
index 0023027341e5f..534ca84ceb9a6 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -110,7 +110,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 invalid_param = true;
                 break;
             }
-            params.seed = std::stoi(argv[i]);
+            params.seed = std::stoul(argv[i]);
         } else if (arg == "-t" || arg == "--threads") {
             if (++i >= argc) {
                 invalid_param = true;
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index dd590a7d22da6..2b7eb39c51ff5 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -24,7 +24,7 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed == -1) {
+    if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
 
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 5439bd1786d31..3a171925ba510 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -94,7 +94,7 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed == -1) {
+    if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
 
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 0782a28472d0d..dd54ed3c4bd6c 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -136,7 +136,7 @@ int main(int argc, char ** argv) {
 
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
-    if (params.seed == -1) {
+    if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
 
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 0263054318aa5..05bfa80167968 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -3034,7 +3034,7 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    if (params.seed == -1) {
+    if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
     printf("%s: seed: %u\n", __func__, params.seed);
diff --git a/llama.cpp b/llama.cpp
index 232df6b4e8cff..b5c9c18d5efd2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -938,7 +938,7 @@ static bool kv_cache_init(
 
 struct llama_context_params llama_context_default_params() {
     struct llama_context_params result = {
-        /*.seed                        =*/ (unsigned int)-1,
+        /*.seed                        =*/ LLAMA_DEFAULT_SEED,
         /*.n_ctx                       =*/ 512,
         /*.n_batch                     =*/ 512,
         /*.gpu_layers                  =*/ 0,
@@ -2692,7 +2692,7 @@ struct llama_context * llama_new_context_with_model(
 
     llama_context * ctx = new llama_context(*model, model->vocab);
 
-    if (params.seed < 0) {
+    if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
 
diff --git a/llama.h b/llama.h
index b5cdc4b173888..90a219abb8e54 100644
--- a/llama.h
+++ b/llama.h
@@ -46,6 +46,8 @@
 #define LLAMA_SESSION_MAGIC          LLAMA_FILE_MAGIC_GGSN
 #define LLAMA_SESSION_VERSION        1
 
+#define LLAMA_DEFAULT_SEED           0xFFFFFFFF
+
 #if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
 // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
 #define LLAMA_SUPPORTS_GPU_OFFLOAD
@@ -81,11 +83,11 @@ extern "C" {
     typedef void (*llama_progress_callback)(float progress, void *ctx);
 
    struct llama_context_params {
-        unsigned int seed;                     // RNG seed, -1 for random
-        int n_ctx;                             // text context
-        int n_batch;                           // prompt processing batch size
-        int n_gpu_layers;                      // number of layers to store in VRAM
-        int main_gpu;                          // the GPU that is used for scratch and small tensors
+        uint32_t seed;                         // RNG seed, -1 for random
+        int32_t  n_ctx;                        // text context
+        int32_t  n_batch;                      // prompt processing batch size
+        int32_t  n_gpu_layers;                 // number of layers to store in VRAM
+        int32_t  main_gpu;                     // the GPU that is used for scratch and small tensors
         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
         // called with a progress value between 0 and 1, pass NULL to disable
         llama_progress_callback progress_callback;

From 5a16205274b0b25c3d7a4e1221cd2841e19fa61b Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Tue, 27 Jun 2023 19:48:07 +0800
Subject: [PATCH 3/5] Missing one place to replace -1 with default seed
 constant

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index b5c9c18d5efd2..94ec3807e7bea 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3092,7 +3092,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
 #define LLAMA_MAX_RNG_STATE (64*1024)
 
 void llama_set_rng_seed(struct llama_context * ctx, unsigned int seed) {
-    if (seed == -1) {
+    if (seed == LLAMA_DEFAULT_SEED) {
         seed = time(NULL);
     }
     ctx->rng.seed(seed);

From d7d454f227368431df7de4439baadd0029b8a87f Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Wed, 28 Jun 2023 15:51:56 -0700
Subject: [PATCH 4/5] Use uint32_t for seed

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 llama.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.h b/llama.h
index 90a219abb8e54..efc8efe86019c 100644
--- a/llama.h
+++ b/llama.h
@@ -198,7 +198,7 @@ extern "C" {
     LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
 
     // Sets the current rng seed.
-    LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, unsigned int seed);
+    LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed);
 
     // Returns the maximum size in bytes of the state (rng, logits, embedding
     // and kv_cache) - will often be smaller after compacting tokens

From 7c6121eb6496137ef7a64b80d8155c1a93aae7e0 Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Wed, 28 Jun 2023 15:52:23 -0700
Subject: [PATCH 5/5] use uint32_t for seed

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 94ec3807e7bea..e147244483320 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3091,7 +3091,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
 
 #define LLAMA_MAX_RNG_STATE (64*1024)
 
-void llama_set_rng_seed(struct llama_context * ctx, unsigned int seed) {
+void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed) {
     if (seed == LLAMA_DEFAULT_SEED) {
         seed = time(NULL);
     }