lol040604lol commited on
Commit
50a99ec
·
verified ·
1 Parent(s): 17e2154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -25
app.py CHANGED
@@ -1,36 +1,41 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
3
 
4
- # Load IndicBART model
5
- model_name = "ai4bharat/IndicBART"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
8
 
9
- def restore_text(input_text, task_type):
10
- prefix_map = {
11
- "Restore & Correct Tamil Text": "restore: ",
12
- "Summarize in Tamil": "summarize: ",
13
- "Translate to English": "translate Tamil to English: "
14
- }
15
- prefix = prefix_map.get(task_type, "restore: ")
16
 
17
- input_text = prefix + input_text
18
- inputs = tokenizer([input_text], return_tensors="pt", padding=True)
19
- outputs = model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
20
- decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
21
- return decoded_output[0]
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Gradio Interface
24
  iface = gr.Interface(
25
- fn=restore_text,
26
  inputs=[
27
- gr.Textbox(label="Input Tamil Text", lines=8, placeholder="Enter ancient Tamil text here..."),
28
- gr.Radio(choices=["Restore & Correct Tamil Text", "Summarize in Tamil", "Translate to English"],
29
- label="Select Task")
30
  ],
31
- outputs=gr.Textbox(label="Output"),
32
- title="🕉️ Ancient Tamil Text Restorer",
33
- description="Restores, corrects, and translates 11-12th century Tamil texts using AI4Bharat's IndicBART model."
34
  )
35
 
36
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
+ import torch
4
 
5
+ # Load DeepSeek-R1 model
6
+ tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
7
+ model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True)
 
8
 
9
+ # Device config
10
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ model.to(device)
 
 
 
 
12
 
13
+ def restore_tamil_text(prompt, uploaded_file=None):
14
+ context = ""
15
+
16
+ # If a file is uploaded, read its contents and use it for context
17
+ if uploaded_file:
18
+ file_bytes = uploaded_file.read()
19
+ context = file_bytes.decode("utf-8")
20
+ full_prompt = f"You are an expert in ancient Tamil. Use the document below to restore and expand the text.\n\nDocument:\n{context}\n\nTask:\n{prompt}"
21
+ else:
22
+ full_prompt = f"You are a Tamil literature historian. {prompt}"
23
+
24
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
25
+ outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.7)
26
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
+
28
+ return output_text.strip()
29
 
 
30
  iface = gr.Interface(
31
+ fn=restore_tamil_text,
32
  inputs=[
33
+ gr.Textbox(label="Your Prompt (e.g., Restore this ancient Tamil text...)", lines=6),
34
+ gr.File(label="Optional: Upload Ancient Tamil File (.txt)")
 
35
  ],
36
+ outputs=gr.Textbox(label="Restored Output"),
37
+ title="🧠 Ancient Tamil Literature AI Agent",
38
+ description="Hybrid AI using DeepSeek-R1 + optional file context. Restores and expands ancient Tamil literature using DeepSeek LLM."
39
  )
40
 
41
  if __name__ == "__main__":