feat: initial commit

2023-09-18 12:59:41 -05:00 · 2023-09-18 12:59:41 -05:00 · 48bded9bf1
commit 48bded9bf1
7 changed files with 90 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,2 @@
 .env
 env
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 .env
 model
 __pycache__
--- a/.tool-versions
+++ b/.tool-versions
@ -0,0 +1 @@
 python 3.10.13
--- a/23
+++ b/23
@ -0,0 +1,23 @@
 FROM python:3.10-bookworm AS base
 FROM base AS tini
 ENV TINI_VERSION v0.19.0
 ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
 RUN chmod +x /tini
 ENTRYPOINT ["/tini", "--"]
 FROM tini AS tools
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
  --mount=type=cache,target=/var/lib/apt,sharing=locked \
  export DEBIAN_FRONTEND=noninteractive \
  && apt-get update -y \
  && apt-get upgrade -y
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 FROM tools AS api
 COPY . .
 CMD ["python", "."]
--- a/README.md
+++ b/README.md
@ -0,0 +1,11 @@
 # Open LLaMA
 Run [OpenLLaMA](https://github.com/openlm-research/open_llama) in a GPU environment with a single command. 📡
 ## Speed Run
 1. Signup for [Beam](http://beam.cloud)
 2. Download the CLI and Python SDK
 3. Clone this template locally: `beam create-app openllama`
 4. Spin up a GPU environment to run inference: `beam start app.py`
 5. Deploy the app as a web API: `beam deploy app.py`
--- a/main.py
+++ b/main.py
@ -0,0 +1,43 @@
 from bottle import request, response, post, template, HTTPResponse, run
 from dotenv import load_dotenv
 from os import getenv
 import torch
 from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
 tokenizer = T5Tokenizer.from_pretrained(
  "google/flan-t5-xl",
  cache_dir = "model",
 )
 model = T5ForConditionalGeneration.from_pretrained(
  "google/flan-t5-xl",
 )
 load_dotenv()
 api_keys = getenv('API_KEYS').split(sep=",")
 generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
@post('/')
 def gen():
  auth = request.get_header('authorization')
  if auth is None:
    raise HTTPResponse(status = 401)
  scheme, val = auth.split(sep=" ")
  if scheme != 'X-Api-Key' or val not in api_keys:
    raise HTTPResponse(status = 401)
  body = request.json
  input = body["input"]
  output = generator(input)
  print(input)
  print(output)
  return {"output": output[0]["generated_text"]}
 if __name__ == "__main__":
  run(host='0.0.0.0', port=9010)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,7 @@
 python-dotenv~=1.0.0
 torch
 accelerate
 sentencepiece
 protobuf
 transformers
 bottle