Lock 13B and 70B models in memory for faster interference
This commit is contained in:
parent
339cf66be6
commit
9d086c2e28
|
@ -9,6 +9,9 @@ services:
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
environment:
|
environment:
|
||||||
MODEL: '/models/llama-2-13b-chat.bin'
|
MODEL: '/models/llama-2-13b-chat.bin'
|
||||||
|
USE_MLOCK: 1
|
||||||
|
cap_add:
|
||||||
|
- IPC_LOCK
|
||||||
|
|
||||||
llama-gpt-ui:
|
llama-gpt-ui:
|
||||||
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
||||||
|
|
|
@ -15,6 +15,9 @@ services:
|
||||||
# See: https://github.com/abetlen/llama-cpp-python/issues/528
|
# See: https://github.com/abetlen/llama-cpp-python/issues/528
|
||||||
# and: https://github.com/facebookresearch/llama/issues/407
|
# and: https://github.com/facebookresearch/llama/issues/407
|
||||||
N_GQA: '8'
|
N_GQA: '8'
|
||||||
|
USE_MLOCK: 1
|
||||||
|
cap_add:
|
||||||
|
- IPC_LOCK
|
||||||
|
|
||||||
llama-gpt-ui:
|
llama-gpt-ui:
|
||||||
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
||||||
|
|
Loading…
Reference in New Issue