33 lines
1.0 KiB
YAML
33 lines
1.0 KiB
YAML
version: '3.6'
|
|
|
|
services:
|
|
llama-gpt-api-70b:
|
|
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
|
|
build:
|
|
context: ./api
|
|
dockerfile: 70B.Dockerfile
|
|
restart: on-failure
|
|
environment:
|
|
MODEL: '/models/llama-2-70b-chat.bin'
|
|
# Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
|
|
# it's not possible to change this using --n_gqa with llama-cpp-python in
|
|
# run.sh, so we expose it as an environment variable.
|
|
# See: https://github.com/abetlen/llama-cpp-python/issues/528
|
|
# and: https://github.com/facebookresearch/llama/issues/407
|
|
N_GQA: '8'
|
|
USE_MLOCK: 1
|
|
cap_add:
|
|
- IPC_LOCK
|
|
|
|
llama-gpt-ui:
|
|
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
|
ports:
|
|
- 3000:3000
|
|
restart: on-failure
|
|
environment:
|
|
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
|
- 'OPENAI_API_HOST=http://llama-gpt-api-70b:8000'
|
|
- 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin'
|
|
- 'WAIT_HOSTS=llama-gpt-api-70b:8000'
|
|
- 'WAIT_TIMEOUT=600'
|