Lock 13B and 70B models in memory for faster interference

This commit is contained in:
Mayank Chhabra 2023-08-18 03:11:42 +07:00
parent 339cf66be6
commit 9d086c2e28
2 changed files with 6 additions and 0 deletions

View File

@ -9,6 +9,9 @@ services:
restart: on-failure restart: on-failure
environment: environment:
MODEL: '/models/llama-2-13b-chat.bin' MODEL: '/models/llama-2-13b-chat.bin'
USE_MLOCK: 1
cap_add:
- IPC_LOCK
llama-gpt-ui: llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'

View File

@ -15,6 +15,9 @@ services:
# See: https://github.com/abetlen/llama-cpp-python/issues/528 # See: https://github.com/abetlen/llama-cpp-python/issues/528
# and: https://github.com/facebookresearch/llama/issues/407 # and: https://github.com/facebookresearch/llama/issues/407
N_GQA: '8' N_GQA: '8'
USE_MLOCK: 1
cap_add:
- IPC_LOCK
llama-gpt-ui: llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'