29 lines
711 B
Bash
Executable File
29 lines
711 B
Bash
Executable File
#!/bin/bash
|
|
|
|
make build
|
|
|
|
# Get the number of available threads on the system
|
|
n_threads=$(grep -c ^processor /proc/cpuinfo)
|
|
|
|
# Define context window
|
|
n_ctx=4096
|
|
|
|
# Offload everything to CPU
|
|
n_gpu_layers=0
|
|
|
|
# Define batch size
|
|
n_batch=2096
|
|
# If total RAM is less than 8GB, set batch size to 1024
|
|
total_ram=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}')
|
|
if [ $total_ram -lt 8000000 ]; then
|
|
n_batch=1024
|
|
fi
|
|
|
|
echo "Initializing server with:"
|
|
echo "Batch size: $n_batch"
|
|
echo "Number of CPU threads: $n_threads"
|
|
echo "Number of GPU layers: $n_gpu_layers"
|
|
echo "Context window: $n_ctx"
|
|
|
|
exec python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch
|