lol040604lol commited on
Commit
a5fec08
·
verified ·
1 Parent(s): 2ea069c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -34
app.py CHANGED
@@ -1,35 +1,28 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
3
  from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
4
- from llama_index.llms import HuggingFaceLLM
5
  from llama_index.embeddings import HuggingFaceEmbedding
6
- import torch
7
 
8
- # Load IndicBART model
9
- indicbart_model_name = "ai4bharat/IndicBART"
10
- indicbart_tokenizer = AutoTokenizer.from_pretrained(indicbart_model_name)
11
- indicbart_model = AutoModelForSeq2SeqLM.from_pretrained(indicbart_model_name)
12
 
13
- # Load DeepSeek-R1 model
14
- deepseek_model_name = "deepseek-ai/DeepSeek-R1"
15
- deepseek_tokenizer = AutoTokenizer.from_pretrained(deepseek_model_name, trust_remote_code=True)
16
- deepseek_model = AutoModelForCausalLM.from_pretrained(deepseek_model_name, trust_remote_code=True)
17
 
18
- # Setup LlamaIndex with HuggingFace models
19
- embedding_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
20
- llm = HuggingFaceLLM(
21
- model=deepseek_model,
22
- tokenizer=deepseek_tokenizer,
23
- context_window=4096,
24
- max_new_tokens=512,
25
- generate_kwargs={"temperature": 0.7, "top_p": 0.9},
26
- device_map="auto"
27
- )
28
- service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding_model)
29
- documents = SimpleDirectoryReader("Tanjore taining script.docx").load_data()
30
  index = VectorStoreIndex.from_documents(documents, service_context=service_context)
31
- query_engine = index.as_query_engine()
32
 
 
33
  def restore_text(input_text, task_type):
34
  prefix_map = {
35
  "Restore & Correct Tamil Text": "restore: ",
@@ -43,23 +36,38 @@ def restore_text(input_text, task_type):
43
  decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
44
  return decoded_output[0]
45
 
46
- def answer_query(query):
 
 
 
 
 
 
47
  response = query_engine.query(query)
48
  return str(response)
49
 
 
50
  with gr.Blocks() as demo:
51
- gr.Markdown("# 🕉️ Ancient Tamil Text Restorer")
52
- with gr.Tab("Restore & Translate"):
 
53
  input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
54
  task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
55
  output_text = gr.Textbox(label="Output")
56
- restore_button = gr.Button("Process")
57
- restore_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text)
58
- with gr.Tab("Ask Historical Questions"):
59
- query_input = gr.Textbox(label="Ask a question about the uploaded documents")
60
- query_output = gr.Textbox(label="Answer")
61
- query_button = gr.Button("Ask")
62
- query_button.click(fn=answer_query, inputs=query_input, outputs=query_output)
 
 
 
 
 
 
 
63
 
64
  if __name__ == "__main__":
65
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
3
  from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
4
+ from llama_index.llms import DeepSeek
5
  from llama_index.embeddings import HuggingFaceEmbedding
6
+ import os
7
 
8
+ # Load DeepSeek-R1
9
+ deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
10
+ deepseek_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
 
11
 
12
+ # Load IndicBART
13
+ indicbart_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBART")
14
+ indicbart_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/IndicBART")
 
15
 
16
+ # Initialize LlamaIndex components
17
+ embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
+ llm = DeepSeek(model="deepseek-reasoner", api_key=os.getenv("DEEPSEEK_API_KEY"))
19
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
20
+
21
+ # Build index from documents in 'data' directory
22
+ documents = SimpleDirectoryReader("data").load_data()
 
 
 
 
 
23
  index = VectorStoreIndex.from_documents(documents, service_context=service_context)
 
24
 
25
+ # Define functions for each task
26
  def restore_text(input_text, task_type):
27
  prefix_map = {
28
  "Restore & Correct Tamil Text": "restore: ",
 
36
  decoded_output = indicbart_tokenizer.batch_decode(outputs, skip_special_tokens=True)
37
  return decoded_output[0]
38
 
39
+ def deepseek_chat(message):
40
+ inputs = deepseek_tokenizer.encode(message + deepseek_tokenizer.eos_token, return_tensors="pt")
41
+ outputs = deepseek_model.generate(inputs, max_length=1024, pad_token_id=deepseek_tokenizer.eos_token_id)
42
+ return deepseek_tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+
44
+ def query_documents(query):
45
+ query_engine = index.as_query_engine()
46
  response = query_engine.query(query)
47
  return str(response)
48
 
49
+ # Gradio Interface
50
  with gr.Blocks() as demo:
51
+ gr.Markdown("## 🕉️ Ancient Tamil Literature Expert AI")
52
+
53
+ with gr.Tab("IndicBART Tasks"):
54
  input_text = gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here...")
55
  task_type = gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"], label="Select Task")
56
  output_text = gr.Textbox(label="Output")
57
+ submit_button = gr.Button("Submit")
58
+ submit_button.click(fn=restore_text, inputs=[input_text, task_type], outputs=output_text)
59
+
60
+ with gr.Tab("DeepSeek-R1 Chat"):
61
+ chat_input = gr.Textbox(label="Enter your message")
62
+ chat_output = gr.Textbox(label="DeepSeek-R1 Response")
63
+ chat_button = gr.Button("Send")
64
+ chat_button.click(fn=deepseek_chat, inputs=chat_input, outputs=chat_output)
65
+
66
+ with gr.Tab("Document Query"):
67
+ query_input = gr.Textbox(label="Enter your query")
68
+ query_output = gr.Textbox(label="Query Response")
69
+ query_button = gr.Button("Search")
70
+ query_button.click(fn=query_documents, inputs=query_input, outputs=query_output)
71
 
72
  if __name__ == "__main__":
73
  demo.launch()