From ee97955bb7650cd4a1d85f93e0adadb032598366 Mon Sep 17 00:00:00 2001
From: Mayank Chhabra
Date: Tue, 15 Aug 2023 23:11:39 +0700
Subject: [PATCH] Add support for 13B and 70B models, workflow, readme
---
.github/workflows/on-push.yml | 32 +++++++++++++
license => LICENSE.md | 0
README.md | 84 +++++++++++++++++++++++++++++++++++
api/13B.Dockerfile | 26 +++++++++++
api/70B.Dockerfile | 26 +++++++++++
docker-compose-13b.yml | 16 +++++++
docker-compose-70b.yml | 16 +++++++
docker-compose.yml | 2 +
ui/types/openai.ts | 16 ++++++-
9 files changed, 217 insertions(+), 1 deletion(-)
create mode 100644 .github/workflows/on-push.yml
rename license => LICENSE.md (100%)
create mode 100644 README.md
create mode 100644 api/13B.Dockerfile
create mode 100644 api/70B.Dockerfile
create mode 100644 docker-compose-13b.yml
create mode 100644 docker-compose-70b.yml
diff --git a/.github/workflows/on-push.yml b/.github/workflows/on-push.yml
new file mode 100644
index 0000000..00d925c
--- /dev/null
+++ b/.github/workflows/on-push.yml
@@ -0,0 +1,32 @@
+name: Build Docker images on master push
+
+on:
+ push:
+ branch:
+ - master
+
+jobs:
+ build_api:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
+ - run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
+ - run: docker buildx create --use
+ # 7B
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
+ # 13B
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
+ # 70B
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
+
+ build_ui:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
+ - run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
+ - run: docker buildx create --use
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:${{ github.sha }} --push .
+ - run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:latest --push .
diff --git a/license b/LICENSE.md
similarity index 100%
rename from license
rename to LICENSE.md
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6d4f816
--- /dev/null
+++ b/README.md
@@ -0,0 +1,84 @@
+
+
+
+
+
+
+
LlamaGPT
+
+ A self-hosted, offline, ChatGPT-like chatbot, powered by Llama 2. 100% private, with no data leaving your device.
+
+ umbrel.com ยป
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Demo
+
+https://github.com/getumbrel/llama-gpt/assets/10330103/71521963-6df2-4ffb-8fe1-f079e80d6a8b
+
+## How to install
+
+### Install LlamaGPT on your umbrelOS home server
+
+Running LlamaGPT on an [umbrelOS](https://umbrel.com) home server is one click. Simply install it from the [Umbrel App Store](https://apps.umbrel.com/app/llama-gpt).
+
+
+
+[](https://apps.umbrel.com/app/llama-gpt)
+
+### Install LlamaGPT anywhere else
+
+You can run LlamaGPT on any x86 or arm64 system. Make sure you have Docker installed.
+
+Then, clone this repo and `cd` into it:
+
+```
+git clone https://github.com/getumbrel/llama-gpt.git
+cd llama-gpt
+```
+
+You can now run LlamaGPT with any of the following models depending upon your hardware:
+
+| Model size | Model used | Minimum RAM required | How to start LlamaGPT |
+| ---------- | ----------------------------------- | -------------------- | ------------------------------------------------ |
+| 7B | Nous Hermes Llama 2 7B (GGML q4_0) | 8GB | `docker compose up -d` |
+| 13B | Nous Hermes Llama 2 13B (GGML q4_0) | 16GB | `docker compose -f docker-compose-13b.yml up -d` |
+| 70B | Meta Llama 2 70B Chat (GGML q4_0) | 48GB | `docker compose -f docker-compose-70b.yml up -d` |
+
+You can access LlamaGPT at `http://localhost:3000`.
+
+To stop LlamaGPT, run:
+
+```
+docker compose down
+```
+
+## Acknowledgements
+
+A massive thank you to the following developers and teams for making LlamaGPT possible:
+
+- [Mckay Wrigley](https://github.com/mckaywrigley) for building [Chatbot UI](https://github.com/mckaywrigley).
+- [Andrei](https://github.com/abetlen) for building the [Python bindings for llama.cpp](https://github.com/abetlen/llama-cpp-python).
+- [NousResearch](https://nousresearch.com) for [fine-tuning the Llama 2 7B and 13B models](https://huggingface.co/NousResearch).
+- [Tom Jobbins](https://huggingface.co/TheBloke) for [quantizing the Llama 2 models](https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML).
+- [Meta](https://ai.meta.com/llama) for releasing Llama 2 under a permissive license.
+
+---
+
+[](https://github.com/getumbrel/llama-gpt/blob/master/LICENSE.md)
+
+[umbrel.com](https://umbrel.com)
diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile
new file mode 100644
index 0000000..474d77f
--- /dev/null
+++ b/api/13B.Dockerfile
@@ -0,0 +1,26 @@
+# Define the image argument and provide a default value
+ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
+
+# Define the model file name and download url
+ARG MODEL_FILE=llama-2-13b-chat.bin
+ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin
+
+FROM ${IMAGE}
+
+ARG MODEL_FILE
+ARG MODEL_DOWNLOAD_URL
+
+# Download the model file
+RUN apt-get update -y && \
+ apt-get install --yes curl && \
+ mkdir -p /models && \
+ curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
+
+WORKDIR /app
+
+COPY . .
+
+EXPOSE 8000
+
+# Run the server start script
+CMD ["/bin/sh", "/app/run.sh"]
\ No newline at end of file
diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile
new file mode 100644
index 0000000..036797b
--- /dev/null
+++ b/api/70B.Dockerfile
@@ -0,0 +1,26 @@
+# Define the image argument and provide a default value
+ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
+
+# Define the model file name and download url
+ARG MODEL_FILE=llama-2-70b-chat.bin
+ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin
+
+FROM ${IMAGE}
+
+ARG MODEL_FILE
+ARG MODEL_DOWNLOAD_URL
+
+# Download the model file
+RUN apt-get update -y && \
+ apt-get install --yes curl && \
+ mkdir -p /models && \
+ curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
+
+WORKDIR /app
+
+COPY . .
+
+EXPOSE 8000
+
+# Run the server start script
+CMD ["/bin/sh", "/app/run.sh"]
\ No newline at end of file
diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml
new file mode 100644
index 0000000..13e4d0f
--- /dev/null
+++ b/docker-compose-13b.yml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+ llama-gpt-api:
+ image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
+ environment:
+ MODEL: '/models/llama-2-13b-chat.bin'
+
+ llama-gpt-ui:
+ image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
+ ports:
+ - 3000:3000
+ environment:
+ - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+ - 'OPENAI_API_HOST=http://llama-gpt-api:8000'
+ - 'DEFAULT_MODEL=/models/llama-2-13b-chat.bin'
diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml
new file mode 100644
index 0000000..4ae318e
--- /dev/null
+++ b/docker-compose-70b.yml
@@ -0,0 +1,16 @@
+version: '3.6'
+
+services:
+ llama-gpt-api:
+ image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
+ environment:
+ MODEL: '/models/llama-2-70b-chat.bin'
+
+ llama-gpt-ui:
+ image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
+ ports:
+ - 3000:3000
+ environment:
+ - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
+ - 'OPENAI_API_HOST=http://llama-gpt-api:8000'
+ - 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin'
diff --git a/docker-compose.yml b/docker-compose.yml
index 1557e63..e1e447a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,7 @@ version: '3.6'
services:
llama-gpt-api:
+ # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest'
build:
context: ./api
dockerfile: Dockerfile
@@ -9,6 +10,7 @@ services:
MODEL: '/models/llama-2-7b-chat.bin'
llama-gpt-ui:
+ # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
build:
context: ./ui
dockerfile: Dockerfile
diff --git a/ui/types/openai.ts b/ui/types/openai.ts
index c4d2ba6..5c97b7a 100644
--- a/ui/types/openai.ts
+++ b/ui/types/openai.ts
@@ -13,6 +13,8 @@ export enum OpenAIModelID {
GPT_4 = 'gpt-4',
GPT_4_32K = 'gpt-4-32k',
LLAMA_7B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-7b-chat.bin',
+ LLAMA_13B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-13b-chat.bin',
+ LLAMA_70B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-70b-chat.bin',
}
// in case the `DEFAULT_MODEL` environment variable is not set or set to an unsupported model
@@ -45,7 +47,19 @@ export const OpenAIModels: Record = {
},
[OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0]: {
id: OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0,
- name: 'Llama 2 Chat 7B',
+ name: 'Llama 2 7B',
+ maxLength: 12000,
+ tokenLimit: 4000,
+ },
+ [OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0]: {
+ id: OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0,
+ name: 'Llama 2 13B',
+ maxLength: 12000,
+ tokenLimit: 4000,
+ },
+ [OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0]: {
+ id: OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0,
+ name: 'Llama 2 70B',
maxLength: 12000,
tokenLimit: 4000,
},