Lock 13B and 70B models in memory for faster interference

2023-08-18 03:11:42 +07:00 · 2023-08-18 03:11:42 +07:00 · 9d086c2e28
parent 339cf66be6
commit 9d086c2e28
2 changed files with 6 additions and 0 deletions
--- a/docker-compose-13b.yml
+++ b/docker-compose-13b.yml
@ -9,6 +9,9 @@ services:
    restart: on-failure
    environment:
      MODEL: '/models/llama-2-13b-chat.bin'
+      USE_MLOCK: 1
+    cap_add:
+      - IPC_LOCK

  llama-gpt-ui:
    image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
--- a/docker-compose-70b.yml
+++ b/docker-compose-70b.yml
@ -15,6 +15,9 @@ services:
      # See: https://github.com/abetlen/llama-cpp-python/issues/528
      # and: https://github.com/facebookresearch/llama/issues/407
      N_GQA: '8'
+      USE_MLOCK: 1
+    cap_add:
+      - IPC_LOCK

  llama-gpt-ui:
    image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'