From ee97955bb7650cd4a1d85f93e0adadb032598366 Mon Sep 17 00:00:00 2001 From: Mayank Chhabra Date: Tue, 15 Aug 2023 23:11:39 +0700 Subject: [PATCH] Add support for 13B and 70B models, workflow, readme --- .github/workflows/on-push.yml | 32 +++++++++++++ license => LICENSE.md | 0 README.md | 84 +++++++++++++++++++++++++++++++++++ api/13B.Dockerfile | 26 +++++++++++ api/70B.Dockerfile | 26 +++++++++++ docker-compose-13b.yml | 16 +++++++ docker-compose-70b.yml | 16 +++++++ docker-compose.yml | 2 + ui/types/openai.ts | 16 ++++++- 9 files changed, 217 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/on-push.yml rename license => LICENSE.md (100%) create mode 100644 README.md create mode 100644 api/13B.Dockerfile create mode 100644 api/70B.Dockerfile create mode 100644 docker-compose-13b.yml create mode 100644 docker-compose-70b.yml diff --git a/.github/workflows/on-push.yml b/.github/workflows/on-push.yml new file mode 100644 index 0000000..00d925c --- /dev/null +++ b/.github/workflows/on-push.yml @@ -0,0 +1,32 @@ +name: Build Docker images on master push + +on: + push: + branch: + - master + +jobs: + build_api: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 + - run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io + - run: docker buildx create --use + # 7B + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push . + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push . + # 13B + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push . + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push . + # 70B + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push . + - run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push . + + build_ui: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 + - run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io + - run: docker buildx create --use + - run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:${{ github.sha }} --push . + - run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:latest --push . diff --git a/license b/LICENSE.md similarity index 100% rename from license rename to LICENSE.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d4f816 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +

+ + LlamaGPT + +

+

+

LlamaGPT

+

+ A self-hosted, offline, ChatGPT-like chatbot, powered by Llama 2. 100% private, with no data leaving your device. +
+ umbrel.com ยป +
+
+ + + + + + + + + + + + +

+

+ +## Demo + +https://github.com/getumbrel/llama-gpt/assets/10330103/71521963-6df2-4ffb-8fe1-f079e80d6a8b + +## How to install + +### Install LlamaGPT on your umbrelOS home server + +Running LlamaGPT on an [umbrelOS](https://umbrel.com) home server is one click. Simply install it from the [Umbrel App Store](https://apps.umbrel.com/app/llama-gpt). + + + +[![LlamaGPT on Umbrel App Store](https://apps.umbrel.com/app/nostr-relay/badge-dark.svg)](https://apps.umbrel.com/app/llama-gpt) + +### Install LlamaGPT anywhere else + +You can run LlamaGPT on any x86 or arm64 system. Make sure you have Docker installed. + +Then, clone this repo and `cd` into it: + +``` +git clone https://github.com/getumbrel/llama-gpt.git +cd llama-gpt +``` + +You can now run LlamaGPT with any of the following models depending upon your hardware: + +| Model size | Model used | Minimum RAM required | How to start LlamaGPT | +| ---------- | ----------------------------------- | -------------------- | ------------------------------------------------ | +| 7B | Nous Hermes Llama 2 7B (GGML q4_0) | 8GB | `docker compose up -d` | +| 13B | Nous Hermes Llama 2 13B (GGML q4_0) | 16GB | `docker compose -f docker-compose-13b.yml up -d` | +| 70B | Meta Llama 2 70B Chat (GGML q4_0) | 48GB | `docker compose -f docker-compose-70b.yml up -d` | + +You can access LlamaGPT at `http://localhost:3000`. + +To stop LlamaGPT, run: + +``` +docker compose down +``` + +## Acknowledgements + +A massive thank you to the following developers and teams for making LlamaGPT possible: + +- [Mckay Wrigley](https://github.com/mckaywrigley) for building [Chatbot UI](https://github.com/mckaywrigley). +- [Andrei](https://github.com/abetlen) for building the [Python bindings for llama.cpp](https://github.com/abetlen/llama-cpp-python). +- [NousResearch](https://nousresearch.com) for [fine-tuning the Llama 2 7B and 13B models](https://huggingface.co/NousResearch). +- [Tom Jobbins](https://huggingface.co/TheBloke) for [quantizing the Llama 2 models](https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML). +- [Meta](https://ai.meta.com/llama) for releasing Llama 2 under a permissive license. + +--- + +[![License](https://img.shields.io/github/license/getumbrel/llama-gpt?color=%235351FB)](https://github.com/getumbrel/llama-gpt/blob/master/LICENSE.md) + +[umbrel.com](https://umbrel.com) diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile new file mode 100644 index 0000000..474d77f --- /dev/null +++ b/api/13B.Dockerfile @@ -0,0 +1,26 @@ +# Define the image argument and provide a default value +ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest + +# Define the model file name and download url +ARG MODEL_FILE=llama-2-13b-chat.bin +ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin + +FROM ${IMAGE} + +ARG MODEL_FILE +ARG MODEL_DOWNLOAD_URL + +# Download the model file +RUN apt-get update -y && \ + apt-get install --yes curl && \ + mkdir -p /models && \ + curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL} + +WORKDIR /app + +COPY . . + +EXPOSE 8000 + +# Run the server start script +CMD ["/bin/sh", "/app/run.sh"] \ No newline at end of file diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile new file mode 100644 index 0000000..036797b --- /dev/null +++ b/api/70B.Dockerfile @@ -0,0 +1,26 @@ +# Define the image argument and provide a default value +ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest + +# Define the model file name and download url +ARG MODEL_FILE=llama-2-70b-chat.bin +ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin + +FROM ${IMAGE} + +ARG MODEL_FILE +ARG MODEL_DOWNLOAD_URL + +# Download the model file +RUN apt-get update -y && \ + apt-get install --yes curl && \ + mkdir -p /models && \ + curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL} + +WORKDIR /app + +COPY . . + +EXPOSE 8000 + +# Run the server start script +CMD ["/bin/sh", "/app/run.sh"] \ No newline at end of file diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml new file mode 100644 index 0000000..13e4d0f --- /dev/null +++ b/docker-compose-13b.yml @@ -0,0 +1,16 @@ +version: '3.6' + +services: + llama-gpt-api: + image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest' + environment: + MODEL: '/models/llama-2-13b-chat.bin' + + llama-gpt-ui: + image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' + ports: + - 3000:3000 + environment: + - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' + - 'OPENAI_API_HOST=http://llama-gpt-api:8000' + - 'DEFAULT_MODEL=/models/llama-2-13b-chat.bin' diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml new file mode 100644 index 0000000..4ae318e --- /dev/null +++ b/docker-compose-70b.yml @@ -0,0 +1,16 @@ +version: '3.6' + +services: + llama-gpt-api: + image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest' + environment: + MODEL: '/models/llama-2-70b-chat.bin' + + llama-gpt-ui: + image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' + ports: + - 3000:3000 + environment: + - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' + - 'OPENAI_API_HOST=http://llama-gpt-api:8000' + - 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin' diff --git a/docker-compose.yml b/docker-compose.yml index 1557e63..e1e447a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,7 @@ version: '3.6' services: llama-gpt-api: + # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest' build: context: ./api dockerfile: Dockerfile @@ -9,6 +10,7 @@ services: MODEL: '/models/llama-2-7b-chat.bin' llama-gpt-ui: + # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile diff --git a/ui/types/openai.ts b/ui/types/openai.ts index c4d2ba6..5c97b7a 100644 --- a/ui/types/openai.ts +++ b/ui/types/openai.ts @@ -13,6 +13,8 @@ export enum OpenAIModelID { GPT_4 = 'gpt-4', GPT_4_32K = 'gpt-4-32k', LLAMA_7B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-7b-chat.bin', + LLAMA_13B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-13b-chat.bin', + LLAMA_70B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-70b-chat.bin', } // in case the `DEFAULT_MODEL` environment variable is not set or set to an unsupported model @@ -45,7 +47,19 @@ export const OpenAIModels: Record = { }, [OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0]: { id: OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0, - name: 'Llama 2 Chat 7B', + name: 'Llama 2 7B', + maxLength: 12000, + tokenLimit: 4000, + }, + [OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0]: { + id: OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0, + name: 'Llama 2 13B', + maxLength: 12000, + tokenLimit: 4000, + }, + [OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0]: { + id: OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0, + name: 'Llama 2 70B', maxLength: 12000, tokenLimit: 4000, },