From 9d086c2e28f6580b8229081569e94bbae26a83d3 Mon Sep 17 00:00:00 2001 From: Mayank Chhabra Date: Fri, 18 Aug 2023 03:11:42 +0700 Subject: [PATCH] Lock 13B and 70B models in memory for faster interference --- docker-compose-13b.yml | 3 +++ docker-compose-70b.yml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml index 0c4ba52..20e1f7b 100644 --- a/docker-compose-13b.yml +++ b/docker-compose-13b.yml @@ -9,6 +9,9 @@ services: restart: on-failure environment: MODEL: '/models/llama-2-13b-chat.bin' + USE_MLOCK: 1 + cap_add: + - IPC_LOCK llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml index cee2e8a..91fa5ab 100644 --- a/docker-compose-70b.yml +++ b/docker-compose-70b.yml @@ -15,6 +15,9 @@ services: # See: https://github.com/abetlen/llama-cpp-python/issues/528 # and: https://github.com/facebookresearch/llama/issues/407 N_GQA: '8' + USE_MLOCK: 1 + cap_add: + - IPC_LOCK llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'