Add support for 13B and 70B models, workflow, readme
This commit is contained in:
parent
75cd9d075f
commit
ee97955bb7
|
@ -0,0 +1,32 @@
|
||||||
|
name: Build Docker images on master push
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branch:
|
||||||
|
- master
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build_api:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
|
||||||
|
- run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
|
||||||
|
- run: docker buildx create --use
|
||||||
|
# 7B
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
|
||||||
|
# 13B
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/13B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
|
||||||
|
# 70B
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:${{ github.sha }} --push .
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f api/70B.Dockerfile --tag ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest --push .
|
||||||
|
|
||||||
|
build_ui:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
|
||||||
|
- run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
|
||||||
|
- run: docker buildx create --use
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:${{ github.sha }} --push .
|
||||||
|
- run: docker buildx build --platform linux/amd64,linux/arm64 -f ui/Dockerfile --tag ghcr.io/getumbrel/llama-gpt-ui:latest --push .
|
|
@ -0,0 +1,84 @@
|
||||||
|
<p align="center">
|
||||||
|
<a href="https://apps.umbrel.com/app/llama-gpt">
|
||||||
|
<img width="150" height="150" src="https://i.imgur.com/F0h1k80.png" alt="LlamaGPT" width="200" />
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<p align="center">
|
||||||
|
<h1 align="center">LlamaGPT</h1>
|
||||||
|
<p align="center">
|
||||||
|
A self-hosted, offline, ChatGPT-like chatbot, powered by Llama 2. 100% private, with no data leaving your device.
|
||||||
|
<br />
|
||||||
|
<a href="https://umbrel.com"><strong>umbrel.com »</strong></a>
|
||||||
|
<br />
|
||||||
|
<br />
|
||||||
|
<a href="https://twitter.com/umbrel">
|
||||||
|
<img src="https://img.shields.io/twitter/follow/umbrel?style=social" />
|
||||||
|
</a>
|
||||||
|
<a href="https://t.me/getumbrel">
|
||||||
|
<img src="https://img.shields.io/badge/community-chat-%235351FB">
|
||||||
|
</a>
|
||||||
|
<a href="https://reddit.com/r/getumbrel">
|
||||||
|
<img src="https://img.shields.io/reddit/subreddit-subscribers/getumbrel?style=social">
|
||||||
|
</a>
|
||||||
|
<a href="https://community.umbrel.com">
|
||||||
|
<img src="https://img.shields.io/badge/community-forum-%235351FB">
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## Demo
|
||||||
|
|
||||||
|
https://github.com/getumbrel/llama-gpt/assets/10330103/71521963-6df2-4ffb-8fe1-f079e80d6a8b
|
||||||
|
|
||||||
|
## How to install
|
||||||
|
|
||||||
|
### Install LlamaGPT on your umbrelOS home server
|
||||||
|
|
||||||
|
Running LlamaGPT on an [umbrelOS](https://umbrel.com) home server is one click. Simply install it from the [Umbrel App Store](https://apps.umbrel.com/app/llama-gpt).
|
||||||
|
|
||||||
|
<!-- Todo: update badge link after launch -->
|
||||||
|
|
||||||
|
[](https://apps.umbrel.com/app/llama-gpt)
|
||||||
|
|
||||||
|
### Install LlamaGPT anywhere else
|
||||||
|
|
||||||
|
You can run LlamaGPT on any x86 or arm64 system. Make sure you have Docker installed.
|
||||||
|
|
||||||
|
Then, clone this repo and `cd` into it:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/getumbrel/llama-gpt.git
|
||||||
|
cd llama-gpt
|
||||||
|
```
|
||||||
|
|
||||||
|
You can now run LlamaGPT with any of the following models depending upon your hardware:
|
||||||
|
|
||||||
|
| Model size | Model used | Minimum RAM required | How to start LlamaGPT |
|
||||||
|
| ---------- | ----------------------------------- | -------------------- | ------------------------------------------------ |
|
||||||
|
| 7B | Nous Hermes Llama 2 7B (GGML q4_0) | 8GB | `docker compose up -d` |
|
||||||
|
| 13B | Nous Hermes Llama 2 13B (GGML q4_0) | 16GB | `docker compose -f docker-compose-13b.yml up -d` |
|
||||||
|
| 70B | Meta Llama 2 70B Chat (GGML q4_0) | 48GB | `docker compose -f docker-compose-70b.yml up -d` |
|
||||||
|
|
||||||
|
You can access LlamaGPT at `http://localhost:3000`.
|
||||||
|
|
||||||
|
To stop LlamaGPT, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
## Acknowledgements
|
||||||
|
|
||||||
|
A massive thank you to the following developers and teams for making LlamaGPT possible:
|
||||||
|
|
||||||
|
- [Mckay Wrigley](https://github.com/mckaywrigley) for building [Chatbot UI](https://github.com/mckaywrigley).
|
||||||
|
- [Andrei](https://github.com/abetlen) for building the [Python bindings for llama.cpp](https://github.com/abetlen/llama-cpp-python).
|
||||||
|
- [NousResearch](https://nousresearch.com) for [fine-tuning the Llama 2 7B and 13B models](https://huggingface.co/NousResearch).
|
||||||
|
- [Tom Jobbins](https://huggingface.co/TheBloke) for [quantizing the Llama 2 models](https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML).
|
||||||
|
- [Meta](https://ai.meta.com/llama) for releasing Llama 2 under a permissive license.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[](https://github.com/getumbrel/llama-gpt/blob/master/LICENSE.md)
|
||||||
|
|
||||||
|
[umbrel.com](https://umbrel.com)
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Define the image argument and provide a default value
|
||||||
|
ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
|
||||||
|
|
||||||
|
# Define the model file name and download url
|
||||||
|
ARG MODEL_FILE=llama-2-13b-chat.bin
|
||||||
|
ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin
|
||||||
|
|
||||||
|
FROM ${IMAGE}
|
||||||
|
|
||||||
|
ARG MODEL_FILE
|
||||||
|
ARG MODEL_DOWNLOAD_URL
|
||||||
|
|
||||||
|
# Download the model file
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get install --yes curl && \
|
||||||
|
mkdir -p /models && \
|
||||||
|
curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Run the server start script
|
||||||
|
CMD ["/bin/sh", "/app/run.sh"]
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Define the image argument and provide a default value
|
||||||
|
ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
|
||||||
|
|
||||||
|
# Define the model file name and download url
|
||||||
|
ARG MODEL_FILE=llama-2-70b-chat.bin
|
||||||
|
ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin
|
||||||
|
|
||||||
|
FROM ${IMAGE}
|
||||||
|
|
||||||
|
ARG MODEL_FILE
|
||||||
|
ARG MODEL_DOWNLOAD_URL
|
||||||
|
|
||||||
|
# Download the model file
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get install --yes curl && \
|
||||||
|
mkdir -p /models && \
|
||||||
|
curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Run the server start script
|
||||||
|
CMD ["/bin/sh", "/app/run.sh"]
|
|
@ -0,0 +1,16 @@
|
||||||
|
version: '3.6'
|
||||||
|
|
||||||
|
services:
|
||||||
|
llama-gpt-api:
|
||||||
|
image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
|
||||||
|
environment:
|
||||||
|
MODEL: '/models/llama-2-13b-chat.bin'
|
||||||
|
|
||||||
|
llama-gpt-ui:
|
||||||
|
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
||||||
|
ports:
|
||||||
|
- 3000:3000
|
||||||
|
environment:
|
||||||
|
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||||
|
- 'OPENAI_API_HOST=http://llama-gpt-api:8000'
|
||||||
|
- 'DEFAULT_MODEL=/models/llama-2-13b-chat.bin'
|
|
@ -0,0 +1,16 @@
|
||||||
|
version: '3.6'
|
||||||
|
|
||||||
|
services:
|
||||||
|
llama-gpt-api:
|
||||||
|
image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
|
||||||
|
environment:
|
||||||
|
MODEL: '/models/llama-2-70b-chat.bin'
|
||||||
|
|
||||||
|
llama-gpt-ui:
|
||||||
|
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
||||||
|
ports:
|
||||||
|
- 3000:3000
|
||||||
|
environment:
|
||||||
|
- 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
|
||||||
|
- 'OPENAI_API_HOST=http://llama-gpt-api:8000'
|
||||||
|
- 'DEFAULT_MODEL=/models/llama-2-70b-chat.bin'
|
|
@ -2,6 +2,7 @@ version: '3.6'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
llama-gpt-api:
|
llama-gpt-api:
|
||||||
|
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-7b-chat:latest'
|
||||||
build:
|
build:
|
||||||
context: ./api
|
context: ./api
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
|
@ -9,6 +10,7 @@ services:
|
||||||
MODEL: '/models/llama-2-7b-chat.bin'
|
MODEL: '/models/llama-2-7b-chat.bin'
|
||||||
|
|
||||||
llama-gpt-ui:
|
llama-gpt-ui:
|
||||||
|
# image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
|
||||||
build:
|
build:
|
||||||
context: ./ui
|
context: ./ui
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
|
|
|
@ -13,6 +13,8 @@ export enum OpenAIModelID {
|
||||||
GPT_4 = 'gpt-4',
|
GPT_4 = 'gpt-4',
|
||||||
GPT_4_32K = 'gpt-4-32k',
|
GPT_4_32K = 'gpt-4-32k',
|
||||||
LLAMA_7B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-7b-chat.bin',
|
LLAMA_7B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-7b-chat.bin',
|
||||||
|
LLAMA_13B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-13b-chat.bin',
|
||||||
|
LLAMA_70B_CHAT_GGMLV3_Q4_0 = '/models/llama-2-70b-chat.bin',
|
||||||
}
|
}
|
||||||
|
|
||||||
// in case the `DEFAULT_MODEL` environment variable is not set or set to an unsupported model
|
// in case the `DEFAULT_MODEL` environment variable is not set or set to an unsupported model
|
||||||
|
@ -45,7 +47,19 @@ export const OpenAIModels: Record<OpenAIModelID, OpenAIModel> = {
|
||||||
},
|
},
|
||||||
[OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0]: {
|
[OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0]: {
|
||||||
id: OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0,
|
id: OpenAIModelID.LLAMA_7B_CHAT_GGMLV3_Q4_0,
|
||||||
name: 'Llama 2 Chat 7B',
|
name: 'Llama 2 7B',
|
||||||
|
maxLength: 12000,
|
||||||
|
tokenLimit: 4000,
|
||||||
|
},
|
||||||
|
[OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0]: {
|
||||||
|
id: OpenAIModelID.LLAMA_13B_CHAT_GGMLV3_Q4_0,
|
||||||
|
name: 'Llama 2 13B',
|
||||||
|
maxLength: 12000,
|
||||||
|
tokenLimit: 4000,
|
||||||
|
},
|
||||||
|
[OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0]: {
|
||||||
|
id: OpenAIModelID.LLAMA_70B_CHAT_GGMLV3_Q4_0,
|
||||||
|
name: 'Llama 2 70B',
|
||||||
maxLength: 12000,
|
maxLength: 12000,
|
||||||
tokenLimit: 4000,
|
tokenLimit: 4000,
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue