Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,35 +1,28 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoTokenizer,
|
| 3 |
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
| 4 |
-
from llama_index.llms import
|
| 5 |
from llama_index.embeddings import HuggingFaceEmbedding
|
| 6 |
-
import
|
| 7 |
|
| 8 |
-
# Load
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
indicbart_model = AutoModelForSeq2SeqLM.from_pretrained(indicbart_model_name)
|
| 12 |
|
| 13 |
-
# Load
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name, trust_remote_code=True)
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
llm =
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
generate_kwargs={"temperature": 0.7, "top_p": 0.9},
|
| 26 |
-
device_map="auto"
|
| 27 |
-
)
|
| 28 |
-
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding_model)
|
| 29 |
-
documents = SimpleDirectoryReader("Tanjore taining script.docx").load_data()
|
| 30 |
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
| 31 |
-
query_engine = index.as_query_engine()
|
| 32 |
|
|
|
|
| 33 |
def restore_text(input_text, task_type):
|
| 34 |
prefix_map = {
|
| 35 |
"Restore & Correct Tamil Text": "restore: ",
|
|
@@ -43,23 +36,38 @@ def restore_text(input_text, task_type):
|
|
| 43 |
decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 44 |
return decoded_output[0]
|
| 45 |
|
| 46 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
response = query_engine.query(query)
|
| 48 |
return str(response)
|
| 49 |
|
|
|
|
| 50 |
with gr.Blocks() as demo:
|
| 51 |
-
gr.Markdown("
|
| 52 |
-
|
|
|
|
| 53 |
input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
|
| 54 |
task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
|
| 55 |
output_text = gr.Textbox(label="Output")
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
if __name__ == "__main__":
|
| 65 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
|
| 3 |
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
|
| 4 |
+
from llama_index.llms import DeepSeek
|
| 5 |
from llama_index.embeddings import HuggingFaceEmbedding
|
| 6 |
+
import os
|
| 7 |
|
| 8 |
+
# Load DeepSeek-R1
|
| 9 |
+
deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
|
| 10 |
+
deepseek_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
|
|
|
|
| 11 |
|
| 12 |
+
# Load IndicBART
|
| 13 |
+
indicbart_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART")
|
| 14 |
+
indicbart_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")
|
|
|
|
| 15 |
|
| 16 |
+
# Initialize LlamaIndex components
|
| 17 |
+
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 18 |
+
llm = DeepSeek(model="deepseek-reasoner", api_key=os.getenv("DEEPSEEK_API_KEY"))
|
| 19 |
+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
|
| 20 |
+
|
| 21 |
+
# Build index from documents in 'data' directory
|
| 22 |
+
documents = SimpleDirectoryReader("data").load_data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
|
|
|
|
| 24 |
|
| 25 |
+
# Define functions for each task
|
| 26 |
def restore_text(input_text, task_type):
|
| 27 |
prefix_map = {
|
| 28 |
"Restore & Correct Tamil Text": "restore: ",
|
|
|
|
| 36 |
decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 37 |
return decoded_output[0]
|
| 38 |
|
| 39 |
+
def deepseek_chat(message):
|
| 40 |
+
inputs = deepseek_tokenizer.encode(message + deepseek_tokenizer.eos_token, return_tensors="pt")
|
| 41 |
+
outputs = deepseek_model.generate(inputs, max_length=1024, pad_token_id=deepseek_tokenizer.eos_token_id)
|
| 42 |
+
return deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 43 |
+
|
| 44 |
+
def query_documents(query):
|
| 45 |
+
query_engine = index.as_query_engine()
|
| 46 |
response = query_engine.query(query)
|
| 47 |
return str(response)
|
| 48 |
|
| 49 |
+
# Gradio Interface
|
| 50 |
with gr.Blocks() as demo:
|
| 51 |
+
gr.Markdown("## 🕉️ Ancient Tamil Literature Expert AI")
|
| 52 |
+
|
| 53 |
+
with gr.Tab("IndicBART Tasks"):
|
| 54 |
input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
|
| 55 |
task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
|
| 56 |
output_text = gr.Textbox(label="Output")
|
| 57 |
+
submit_button = gr.Button("Submit")
|
| 58 |
+
submit_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text)
|
| 59 |
+
|
| 60 |
+
with gr.Tab("DeepSeek-R1 Chat"):
|
| 61 |
+
chat_input = gr.Textbox(label="Enter your message")
|
| 62 |
+
chat_output = gr.Textbox(label="DeepSeek-R1 Response")
|
| 63 |
+
chat_button = gr.Button("Send")
|
| 64 |
+
chat_button.click(fn=deepseek_chat, inputs=chat_input, outputs=chat_output)
|
| 65 |
+
|
| 66 |
+
with gr.Tab("Document Query"):
|
| 67 |
+
query_input = gr.Textbox(label="Enter your query")
|
| 68 |
+
query_output = gr.Textbox(label="Query Response")
|
| 69 |
+
query_button = gr.Button("Search")
|
| 70 |
+
query_button.click(fn=query_documents, inputs=query_input, outputs=query_output)
|
| 71 |
|
| 72 |
if __name__ == "__main__":
|
| 73 |
demo.launch()
|