ggml-org
diff --git a/‎.devops/llama-server-cuda.Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.devops/llama-server-cuda.Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.devops/llama-server-intel.Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.devops/llama-server-intel.Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.devops/llama-server-rocm.Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.devops/llama-server-rocm.Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.devops/llama-server-vulkan.Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.devops/llama-server-vulkan.Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.devops/llama-server.Dockerfile
Lines changed: 2 additions & 0 deletions b/‎.devops/llama-server.Dockerfile
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ecrc
Lines changed: 1 addition & 1 deletion b/‎.ecrc
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/common.cpp
Lines changed: 7 additions & 0 deletions b/‎common/common.cpp
Lines changed: 7 additions & 0 deletions
@@ -24,6 +24,8 @@ ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 ENV GGML_CUDA=1
 # Enable cURL
 ENV LLAMA_CURL=1
+# Must be set to 0.0.0.0 so it can listen to requests from host machine
+ENV LLAMA_ARG_HOST=0.0.0.0
 
 RUN make -j$(nproc) llama-server
 
 
@@ -26,6 +26,8 @@ RUN apt-get update && \
 COPY --from=build /app/build/bin/llama-server /llama-server
 
 ENV LC_ALL=C.utf8
+# Must be set to 0.0.0.0 so it can listen to requests from host machine
+ENV LLAMA_ARG_HOST=0.0.0.0
 
 HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
 
 
@@ -39,6 +39,8 @@ ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
 ENV GGML_HIPBLAS=1
 ENV CC=/opt/rocm/llvm/bin/clang
 ENV CXX=/opt/rocm/llvm/bin/clang++
+# Must be set to 0.0.0.0 so it can listen to requests from host machine
+ENV LLAMA_ARG_HOST=0.0.0.0
 
 # Enable cURL
 ENV LLAMA_CURL=1
 
@@ -23,6 +23,8 @@ RUN cp /app/build/bin/llama-server /llama-server && \
     rm -rf /app
 
 ENV LC_ALL=C.utf8
+# Must be set to 0.0.0.0 so it can listen to requests from host machine
+ENV LLAMA_ARG_HOST=0.0.0.0
 
 HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
 
 
@@ -21,6 +21,8 @@ RUN apt-get update && \
 COPY --from=build /app/llama-server /llama-server
 
 ENV LC_ALL=C.utf8
+# Must be set to 0.0.0.0 so it can listen to requests from host machine
+ENV LLAMA_ARG_HOST=0.0.0.0
 
 HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
 
 
@@ -1,5 +1,5 @@
 {
-  "Exclude": ["^\\.gitmodules$"],
+  "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
   "Disable": {
     "IndentSize": true
   }
 
@@ -327,6 +327,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
 void gpt_params_parse_from_env(gpt_params & params) {
     // we only care about server-related params for now
     get_env("LLAMA_ARG_MODEL",            params.model);
+    get_env("LLAMA_ARG_MODEL_URL",        params.model_url);
+    get_env("LLAMA_ARG_MODEL_ALIAS",      params.model_alias);
+    get_env("LLAMA_ARG_HF_REPO",          params.hf_repo);
+    get_env("LLAMA_ARG_HF_FILE",          params.hf_file);
     get_env("LLAMA_ARG_THREADS",          params.n_threads);
     get_env("LLAMA_ARG_CTX_SIZE",         params.n_ctx);
     get_env("LLAMA_ARG_N_PARALLEL",       params.n_parallel);
@@ -341,6 +345,9 @@ void gpt_params_parse_from_env(gpt_params & params) {
     get_env("LLAMA_ARG_EMBEDDINGS",       params.embedding);
     get_env("LLAMA_ARG_FLASH_ATTN",       params.flash_attn);
     get_env("LLAMA_ARG_DEFRAG_THOLD",     params.defrag_thold);
+    get_env("LLAMA_ARG_CONT_BATCHING",    params.cont_batching);
+    get_env("LLAMA_ARG_HOST",             params.hostname);
+    get_env("LLAMA_ARG_PORT",             params.port);
 }
 
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`{`
`2`		`- "Exclude": ["^\\.gitmodules$"],`
	`2`	`+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],`
`3`	`3`	`"Disable": {`
`4`	`4`	`"IndentSize": true`
`5`	`5`	`}`