Spaces:

lol040604lol
/

tamilResrorationUsingDeepseek

Runtime error

App Files Files Community

lol040604lol commited on May 25

Commit

a5fec08

verified ·

1 Parent(s): 2ea069c

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -34

app.py CHANGED Viewed

@@ -1,35 +1,28 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
-from llama_index.llms import HuggingFaceLLM
 from llama_index.embeddings import HuggingFaceEmbedding
-import torch
-# Load IndicBART model
-indicbart_model_name = "ai4bharat/IndicBART"
-indicbart_tokenizer = AutoTokenizer.from_pretrained(indicbart_model_name)
-indicbart_model = AutoModelForSeq2SeqLM.from_pretrained(indicbart_model_name)
-# Load DeepSeek-R1 model
-deepseek_model_name = "deepseek-ai/DeepSeek-R1"
-deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name, trust_remote_code=True)
-deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name, trust_remote_code=True)
-# Setup LlamaIndex with HuggingFace models
-embedding_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
-llm = HuggingFaceLLM(
-    model=deepseek_model,
-    tokenizer=deepseek_tokenizer,
-    context_window=4096,
-    max_new_tokens=512,
-    generate_kwargs={"temperature": 0.7, "top_p": 0.9},
-    device_map="auto"
-)
-service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding_model)
-documents = SimpleDirectoryReader("Tanjore taining script.docx").load_data()
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
-query_engine = index.as_query_engine()
 def restore_text(input_text, task_type):
     prefix_map = {
         "Restore & Correct Tamil Text": "restore: ",
@@ -43,23 +36,38 @@ def restore_text(input_text, task_type):
     decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return decoded_output[0]
-def answer_query(query):
     response = query_engine.query(query)
     return str(response)
 with gr.Blocks() as demo:
-    gr.Markdown("# 🕉️ Ancient Tamil Text Restorer")
-    with gr.Tab("Restore & Translate"):
         input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
         task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
         output_text = gr.Textbox(label="Output")
-        restore_button = gr.Button("Process")
-        restore_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text)
-    with gr.Tab("Ask Historical Questions"):
-        query_input = gr.Textbox(label="Ask a question about the uploaded documents")
-        query_output = gr.Textbox(label="Answer")
-        query_button = gr.Button("Ask")
-        query_button.click(fn=answer_query, inputs=query_input, outputs=query_output)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
+from llama_index.llms import DeepSeek
 from llama_index.embeddings import HuggingFaceEmbedding
+import os
+# Load DeepSeek-R1
+deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
+deepseek_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
+# Load IndicBART
+indicbart_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART")
+indicbart_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")
+# Initialize LlamaIndex components
+embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+llm = DeepSeek(model="deepseek-reasoner", api_key=os.getenv("DEEPSEEK_API_KEY"))
+service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
+# Build index from documents in 'data' directory
+documents = SimpleDirectoryReader("data").load_data()
 index = VectorStoreIndex.from_documents(documents, service_context=service_context)
+# Define functions for each task
 def restore_text(input_text, task_type):
     prefix_map = {
         "Restore & Correct Tamil Text": "restore: ",
     decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
     return decoded_output[0]
+def deepseek_chat(message):
+    inputs = deepseek_tokenizer.encode(message + deepseek_tokenizer.eos_token, return_tensors="pt")
+    outputs = deepseek_model.generate(inputs, max_length=1024, pad_token_id=deepseek_tokenizer.eos_token_id)
+    return deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def query_documents(query):
+    query_engine = index.as_query_engine()
     response = query_engine.query(query)
     return str(response)
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## 🕉️ Ancient Tamil Literature Expert AI")
+    with gr.Tab("IndicBART Tasks"):
         input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
         task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
         output_text = gr.Textbox(label="Output")
+        submit_button = gr.Button("Submit")
+        submit_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text)
+    with gr.Tab("DeepSeek-R1 Chat"):
+        chat_input = gr.Textbox(label="Enter your message")
+        chat_output = gr.Textbox(label="DeepSeek-R1 Response")
+        chat_button = gr.Button("Send")
+        chat_button.click(fn=deepseek_chat, inputs=chat_input, outputs=chat_output)
+    with gr.Tab("Document Query"):
+        query_input = gr.Textbox(label="Enter your query")
+        query_output = gr.Textbox(label="Query Response")
+        query_button = gr.Button("Search")
+        query_button.click(fn=query_documents, inputs=query_input, outputs=query_output)
 if __name__ == "__main__":
     demo.launch()