diff --git a/api/run.sh b/api/run.sh index 3faf55c..38d8118 100755 --- a/api/run.sh +++ b/api/run.sh @@ -25,4 +25,4 @@ echo "Number of CPU threads: $n_threads" echo "Number of GPU layers: $n_gpu_layers" echo "Context window: $n_ctx" -python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch \ No newline at end of file +exec python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch