oumayma03 commited on
Commit
bdeb60c
Β·
verified Β·
1 Parent(s): ca5f9c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -13
app.py CHANGED
@@ -5,12 +5,13 @@ import openai
5
  import pandas as pd
6
  import spaces
7
  from langchain.docstore.document import Document
8
- from langchain_huggingface import HuggingFaceEmbeddings
9
  from langchain_community.vectorstores import FAISS
10
  import gradio as gr
11
  import os
12
  from huggingface_hub import upload_file
13
  from datasets import load_dataset, Dataset
 
14
 
15
  class Meme(BaseModel):
16
  link: str = Field(..., description="The URL of the meme")
@@ -22,8 +23,7 @@ CSV_PATH = "memes_descriptions.csv" # Updated CSV_PATH to reflect the data direc
22
 
23
  API_KEY=os.getenv("OPENROUTER_API_KEY")
24
  HF_key = os.getenv("HUGGINGFACE_KEY")
25
-
26
- retriever = None
27
  try:
28
  client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
29
  model = outlines.from_openai(client, MODEL_NAME)
@@ -49,7 +49,11 @@ def load_data_and_create_vectorstore():
49
 
50
  embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
51
  vectorstore = FAISS.from_documents(documents, embedding_model)
52
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
 
 
 
 
53
  print("βœ… Data loaded and vectorstore created.")
54
  except FileNotFoundError:
55
  print(f"❌ Data file not found at {CSV_PATH}")
@@ -73,9 +77,9 @@ def ask_llm(question: str, docs: list) -> Meme:
73
  print(messages)
74
  try:
75
  meme = model(messages,Meme, stream=False)
76
- #print(meme)
77
  meme = Meme.model_validate_json(meme)
78
- #print(meme)
79
  return meme
80
  except Exception as e:
81
  return f"❌ LLM Error: {e}"
@@ -92,11 +96,7 @@ def query_memes(user_input: str):
92
  raw_url = meme.link
93
  description = meme.description
94
 
95
- if "drive.google" in raw_url:
96
- file_id = raw_url.split("/file/d/")[1].split("/")[0]
97
- embed_url = f"https://drive.google.com/file/d/{file_id}/preview"
98
- else:
99
- embed_url = raw_url
100
 
101
  embed_html = f'''
102
  <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
@@ -113,7 +113,7 @@ DATASET_REPO = "oumayma03/moul_lmemes_dataset"
113
 
114
  def upload_video_to_dataset(local_path: str) -> str:
115
  filename = os.path.basename(local_path)
116
- remote_path = f"{filename}"
117
  uploaded_url = upload_file(
118
  path_or_fileobj=local_path,
119
  path_in_repo=remote_path,
@@ -147,11 +147,28 @@ def upload_meme(video_file, label):
147
  append_to_dataset_repo(video_url, label) # Pass label to the function
148
 
149
  # Reload vectorstore to include new meme
150
- #load_data_and_create_vectorstore()
151
 
152
  return "βœ… Meme uploaded successfully with label!"
153
  except Exception as e:
154
  return f"❌ Upload failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  # --- GRADIO INTERFACE ---
156
  if __name__ == "__main__":
157
  if retriever is None or client is None:
 
5
  import pandas as pd
6
  import spaces
7
  from langchain.docstore.document import Document
8
+ from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain_community.vectorstores import FAISS
10
  import gradio as gr
11
  import os
12
  from huggingface_hub import upload_file
13
  from datasets import load_dataset, Dataset
14
+ import re
15
 
16
  class Meme(BaseModel):
17
  link: str = Field(..., description="The URL of the meme")
 
23
 
24
  API_KEY=os.getenv("OPENROUTER_API_KEY")
25
  HF_key = os.getenv("HUGGINGFACE_KEY")
26
+ retriever = None
 
27
  try:
28
  client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
29
  model = outlines.from_openai(client, MODEL_NAME)
 
49
 
50
  embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
51
  vectorstore = FAISS.from_documents(documents, embedding_model)
52
+ try:
53
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
54
+ except Exception as e:
55
+ print(f"❌ Error creating retriever: {e}")
56
+ retriever = None
57
  print("βœ… Data loaded and vectorstore created.")
58
  except FileNotFoundError:
59
  print(f"❌ Data file not found at {CSV_PATH}")
 
77
  print(messages)
78
  try:
79
  meme = model(messages,Meme, stream=False)
80
+ print(meme)
81
  meme = Meme.model_validate_json(meme)
82
+ print(meme)
83
  return meme
84
  except Exception as e:
85
  return f"❌ LLM Error: {e}"
 
96
  raw_url = meme.link
97
  description = meme.description
98
 
99
+ embed_url = get_embed_url(raw_url)
 
 
 
 
100
 
101
  embed_html = f'''
102
  <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
 
113
 
114
  def upload_video_to_dataset(local_path: str) -> str:
115
  filename = os.path.basename(local_path)
116
+ remote_path = f"data/{filename}"
117
  uploaded_url = upload_file(
118
  path_or_fileobj=local_path,
119
  path_in_repo=remote_path,
 
147
  append_to_dataset_repo(video_url, label) # Pass label to the function
148
 
149
  # Reload vectorstore to include new meme
150
+ load_data_and_create_vectorstore()
151
 
152
  return "βœ… Meme uploaded successfully with label!"
153
  except Exception as e:
154
  return f"❌ Upload failed: {str(e)}"
155
+
156
+ def get_embed_url(raw_url: str) -> str:
157
+ # Google Drive
158
+ if "drive.google" in raw_url and "/file/d/" in raw_url:
159
+ file_id = raw_url.split("/file/d/")[1].split("/")[0]
160
+ return f"https://drive.google.com/file/d/{file_id}/preview"
161
+ # YouTube
162
+ youtube_match = re.match(r"(https?://)?(www\.)?(youtube\.com|youtu\.be)/(watch\?v=|embed/)?([A-Za-z0-9_\-]+)", raw_url)
163
+ if "youtube.com/watch?v=" in raw_url:
164
+ video_id = raw_url.split("watch?v=")[-1].split("&")[0]
165
+ return f"https://www.youtube.com/embed/{video_id}"
166
+ if "youtu.be/" in raw_url:
167
+ video_id = raw_url.split("youtu.be/")[-1].split("?")[0]
168
+ return f"https://www.youtube.com/embed/{video_id}"
169
+ # Default: return as is
170
+ return raw_url
171
+
172
  # --- GRADIO INTERFACE ---
173
  if __name__ == "__main__":
174
  if retriever is None or client is None: