oumayma03 commited on
Commit
a7bcc23
Β·
verified Β·
1 Parent(s): 75623b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -1
app.py CHANGED
@@ -1 +1,113 @@
1
- # Main application logic will go here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Correcting the CSV_PATH to reflect the actual location after copying
2
+ CSV_PATH = "memes.csv"
3
+
4
+ # Re-running the app logic with the corrected path
5
+ import os
6
+ import sys
7
+ import pandas as pd
8
+ import gradio as gr
9
+ from openai import OpenAI
10
+ from langchain.docstore.document import Document
11
+ from langchain.embeddings import HuggingFaceEmbeddings
12
+ from langchain.vectorstores import FAISS
13
+
14
+ # --- CONFIG ---
15
+ # CSV_PATH = "/data/Memes and descriptions - Sheet1.csv" # Corrected above
16
+ MODEL_NAME = "qwen/qwen3-32b:free"
17
+
18
+ # Use environment variable for API key in Hugging Face Spaces
19
+ API_KEY = os.getenv("OPENROUTER_API_KEY")
20
+ #from google.colab import userdata
21
+ #API_KEY = userdata.get('open_router')
22
+ if not API_KEY:
23
+ # Fallback for local testing if needed, but prefer env var
24
+ # sys.exit("❌ Missing OpenRouter/OpenAI API key.")
25
+ print("⚠️ OPENROUTER_API_KEY not set. Using dummy key.")
26
+ API_KEY = "dummy_key"
27
+
28
+
29
+ try:
30
+ client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
31
+ except Exception as e:
32
+ print(f"❌ Failed to initialize OpenRouter client: {e}")
33
+ client = None # Handle case where client initialization fails
34
+
35
+ # --- LOAD DATA ---
36
+ try:
37
+ df = pd.read_csv(CSV_PATH).fillna({"Description": "", "Link": ""})
38
+ documents = [
39
+ Document(
40
+ page_content=row["Description"],
41
+ metadata={"url": str(row["link"]).strip()}
42
+ )
43
+ for _, row in df.iterrows()
44
+ ]
45
+
46
+ # --- FAISS ---
47
+ embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
48
+ vectorstore = FAISS.from_documents(documents, embedding_model)
49
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
50
+
51
+ except FileNotFoundError:
52
+ print(f"❌ Data file not found at {CSV_PATH}")
53
+ documents = []
54
+ vectorstore = None
55
+ retriever = None
56
+ except Exception as e:
57
+ print(f"❌ Error loading data or creating vectorstore: {e}")
58
+ documents = []
59
+ vectorstore = None
60
+ retriever = None
61
+
62
+
63
+ # --- LLM ---
64
+ def ask_llm(question: str, docs: list) -> str:
65
+ if client is None:
66
+ return "❌ LLM client not initialized."
67
+ context = "\n\n".join(
68
+ f"Meme {i+1}:\nDescription: {doc.page_content}\nLink: {doc.metadata.get('url', 'N/A')}"
69
+ for i, doc in enumerate(docs)
70
+ )
71
+ messages = [
72
+ {"role": "system", "content": f"You're a meme expert. the user will say something and the goal is to find the accurate meme out of the following choices : \n{context}"},
73
+ {"role": "user", "content": f"{question}"}
74
+ ]
75
+ try:
76
+ response = client.chat.completions.create(
77
+ model=MODEL_NAME,
78
+ messages=messages,
79
+ extra_headers={"HTTP-Referer": "https://your-site.com", "X-Title": "MemeRAG"}
80
+ )
81
+ return response.choices[0].message.content
82
+ except Exception as e:
83
+ return f"❌ LLM Error: {e}"
84
+
85
+ # --- MAIN QUERY ---
86
+ def query_memes(user_input: str):
87
+ if retriever is None:
88
+ return "❌ RAG system not initialized due to errors."
89
+
90
+ src_docs = retriever.invoke(user_input)
91
+ answer = ask_llm(user_input, src_docs)
92
+
93
+ output_text = f"πŸ’‘ Answer:\n{answer}\n\nπŸ“‚ Top Matching Memes:"
94
+ for i, doc in enumerate(src_docs, 1):
95
+ raw = doc.metadata.get("url", "").strip()
96
+ url = raw if raw.startswith("http") else f"https://drive.google.com/search?q={raw.replace(' ', '%20')}"
97
+ output_text += f"\n\n{i}. {doc.page_content}\n Link: {url}"
98
+
99
+ return output_text
100
+
101
+ # --- GRADIO INTERFACE ---
102
+ if __name__ == "__main__":
103
+ if retriever is None or client is None:
104
+ print("Gradio interface will not run due to RAG/LLM initialization errors.")
105
+ else:
106
+ interface = gr.Interface(
107
+ fn=query_memes,
108
+ inputs=gr.Textbox(label="Ask something about memes"),
109
+ outputs=gr.Textbox(label="Results"),
110
+ title="Memes lharba 🎬",
111
+ description="Ask me to find the perfect meme!"
112
+ )
113
+ interface.launch()