llama-gpt/docker-compose-70b.yml

33 lines
1.0 KiB
YAML

version: '3.6'
services:
llama-gpt-api-70b:
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
build:
context: ./api
dockerfile: 70B.Dockerfile
restart: on-failure
environment:
MODEL: '/models/llama-2-70b-chat.bin'
# Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
# it's not possible to change this using --n_gqa with llama-cpp-python in
# run.sh, so we expose it as an environment variable.
# See: https://github.com/abetlen/llama-cpp-python/issues/528
# and: https://github.com/facebookresearch/llama/issues/407
N_GQA: '8'
USE_MLOCK: 1
cap_add:
- IPC_LOCK
llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
ports:
- 3000:3000
restart: on-failure
environment:
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
- 'OPENAI_API_HOST=http://llama-gpt-api-70b:8000'
- 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin'
- 'WAIT_HOSTS=llama-gpt-api-70b:8000'
- 'WAIT_TIMEOUT=600'