feat: initial commit
This commit is contained in:
commit
48bded9bf1
2
.dockerignore
Normal file
2
.dockerignore
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.env
|
||||||
|
env
|
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
.env
|
||||||
|
model
|
||||||
|
__pycache__
|
1
.tool-versions
Normal file
1
.tool-versions
Normal file
@ -0,0 +1 @@
|
|||||||
|
python 3.10.13
|
23
Dockerfile
Normal file
23
Dockerfile
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
FROM python:3.10-bookworm AS base
|
||||||
|
|
||||||
|
FROM base AS tini
|
||||||
|
|
||||||
|
ENV TINI_VERSION v0.19.0
|
||||||
|
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
|
||||||
|
RUN chmod +x /tini
|
||||||
|
ENTRYPOINT ["/tini", "--"]
|
||||||
|
|
||||||
|
FROM tini AS tools
|
||||||
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||||
|
--mount=type=cache,target=/var/lib/apt,sharing=locked \
|
||||||
|
export DEBIAN_FRONTEND=noninteractive \
|
||||||
|
&& apt-get update -y \
|
||||||
|
&& apt-get upgrade -y
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
FROM tools AS api
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
CMD ["python", "."]
|
11
README.md
Normal file
11
README.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Open LLaMA
|
||||||
|
|
||||||
|
Run [OpenLLaMA](https://github.com/openlm-research/open_llama) in a GPU environment with a single command. 📡
|
||||||
|
|
||||||
|
## Speed Run
|
||||||
|
|
||||||
|
1. Signup for [Beam](http://beam.cloud)
|
||||||
|
2. Download the CLI and Python SDK
|
||||||
|
3. Clone this template locally: `beam create-app openllama`
|
||||||
|
4. Spin up a GPU environment to run inference: `beam start app.py`
|
||||||
|
5. Deploy the app as a web API: `beam deploy app.py`
|
43
__main__.py
Normal file
43
__main__.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
from bottle import request, response, post, template, HTTPResponse, run
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from os import getenv
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
|
||||||
|
|
||||||
|
tokenizer = T5Tokenizer.from_pretrained(
|
||||||
|
"google/flan-t5-xl",
|
||||||
|
cache_dir = "model",
|
||||||
|
)
|
||||||
|
|
||||||
|
model = T5ForConditionalGeneration.from_pretrained(
|
||||||
|
"google/flan-t5-xl",
|
||||||
|
)
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
api_keys = getenv('API_KEYS').split(sep=",")
|
||||||
|
|
||||||
|
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
|
||||||
|
|
||||||
|
@post('/')
|
||||||
|
def gen():
|
||||||
|
auth = request.get_header('authorization')
|
||||||
|
if auth is None:
|
||||||
|
raise HTTPResponse(status = 401)
|
||||||
|
|
||||||
|
scheme, val = auth.split(sep=" ")
|
||||||
|
|
||||||
|
if scheme != 'X-Api-Key' or val not in api_keys:
|
||||||
|
raise HTTPResponse(status = 401)
|
||||||
|
|
||||||
|
body = request.json
|
||||||
|
|
||||||
|
input = body["input"]
|
||||||
|
output = generator(input)
|
||||||
|
|
||||||
|
print(input)
|
||||||
|
print(output)
|
||||||
|
return {"output": output[0]["generated_text"]}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run(host='0.0.0.0', port=9010)
|
7
requirements.txt
Normal file
7
requirements.txt
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
python-dotenv~=1.0.0
|
||||||
|
torch
|
||||||
|
accelerate
|
||||||
|
sentencepiece
|
||||||
|
protobuf
|
||||||
|
transformers
|
||||||
|
bottle
|
Loading…
Reference in New Issue
Block a user