TalkToMe

Runtime error

App Files Files Community

vincentmin commited on Jun 20, 2023

Commit

aedbcbd

1 Parent(s): cb83f9d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -29

app.py CHANGED Viewed

@@ -1,8 +1,12 @@
 import argparse
 import os
 import gradio as gr
-from text_generation import Client
 TITLE = """<h2 align="center">🚀 Falcon-Chat demo</h2>"""
 USER_NAME = "User"
@@ -13,9 +17,20 @@ RETRY_COMMAND = "/retry"
 STOP_STR = f"\n{USER_NAME}:"
 STOP_SUSPECT_LIST = [":", "\n", "User"]
-INFERENCE_ENDPOINT = os.environ.get("INFERENCE_ENDPOINT")
-INFERENCE_AUTH = os.environ.get("INFERENCE_AUTH")
 def chat_accordion():
     with gr.Accordion("Parameters", open=False):
@@ -48,7 +63,7 @@ def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
     return prompt
-def chat(client: Client):
     with gr.Column(elem_id="chat_container"):
         with gr.Row():
             chatbot = gr.Chatbot(elem_id="chatbot")
@@ -106,34 +121,32 @@ def chat(client: Client):
         prompt = format_chat_prompt(message, chat_history, instructions)
         chat_history = chat_history + [[message, ""]]
-        stream = client.generate_stream(
             prompt,
-            do_sample=True,
-            max_new_tokens=1024,
-            stop_sequences=[STOP_STR, "<|endoftext|>"],
             temperature=temperature,
             top_p=top_p,
         )
-        acc_text = ""
-        for idx, response in enumerate(stream):
-            text_token = response.token.text
-            if response.details:
-                return
-            if text_token in STOP_SUSPECT_LIST:
-                acc_text += text_token
-                continue
-            if idx == 0 and text_token.startswith(" "):
-                text_token = text_token[1:]
-            acc_text += text_token
-            last_turn = list(chat_history.pop(-1))
-            last_turn[-1] += acc_text
-            chat_history = chat_history + [last_turn]
-            yield chat_history
-            acc_text = ""
     def delete_last_turn(chat_history):
         if chat_history:
@@ -163,7 +176,7 @@ def chat(client: Client):
     clear_chat_button.click(clear_chat, [], chatbot)
-def get_demo(client: Client):
     with gr.Blocks(
         # css=None
         # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
@@ -195,7 +208,7 @@ def get_demo(client: Client):
                     """
                 )
-        chat(client)
     return demo
@@ -209,7 +222,6 @@ if __name__ == "__main__":
         default=INFERENCE_ENDPOINT,
     )
     args = parser.parse_args()
-    client = Client(args.addr, headers={"Authorization": f"Basic {INFERENCE_AUTH}"})
-    demo = get_demo(client)
     demo.queue(max_size=128, concurrency_count=16)
     demo.launch()

 import argparse
 import os
+import requests
 import gradio as gr
+MODEL = "HuggingFaceH4/starchat-beta"
+API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
+HEADERS = {"Authorization": f"Bearer {os.environ['HUB_TOKEN']}"}
 TITLE = """<h2 align="center">🚀 Falcon-Chat demo</h2>"""
 USER_NAME = "User"
 STOP_STR = f"\n{USER_NAME}:"
 STOP_SUSPECT_LIST = [":", "\n", "User"]
+def run_model(prompt, temperature, top_p):
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 128,
+            "do_sample": True,
+            "temperature": temperature,
+            "top_p": top_p
+    }
+    response = requests.post(API_URL, headers=HEADERS, json=payload)
+    return response.json()[0]['generated_text']
+def get_stream(string: str):
+    return enumerate(iter(string.split(" ")))
 def chat_accordion():
     with gr.Accordion("Parameters", open=False):
     return prompt
+def chat():
     with gr.Column(elem_id="chat_container"):
         with gr.Row():
             chatbot = gr.Chatbot(elem_id="chatbot")
         prompt = format_chat_prompt(message, chat_history, instructions)
         chat_history = chat_history + [[message, ""]]
+        model_output = run_model(
             prompt,
             temperature=temperature,
             top_p=top_p,
         )
+        yield model_output
+        # acc_text = ""
+        # for idx, response in enumerate(stream):
+        #     text_token = response.token.text
+        #     if response.details:
+        #         return
+        #     if text_token in STOP_SUSPECT_LIST:
+        #         acc_text += text_token
+        #         continue
+        #     if idx == 0 and text_token.startswith(" "):
+        #         text_token = text_token[1:]
+        #     acc_text += text_token
+        #     last_turn = list(chat_history.pop(-1))
+        #     last_turn[-1] += acc_text
+        #     chat_history = chat_history + [last_turn]
+        #     yield chat_history
+        #     acc_text = ""
     def delete_last_turn(chat_history):
         if chat_history:
     clear_chat_button.click(clear_chat, [], chatbot)
+def get_demo():
     with gr.Blocks(
         # css=None
         # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
                     """
                 )
+        chat()
     return demo
         default=INFERENCE_ENDPOINT,
     )
     args = parser.parse_args()
+    demo = get_demo()
     demo.queue(max_size=128, concurrency_count=16)
     demo.launch()