Spaces:

atlasia
/

moul_lmemes

Sleeping

App Files Files Community

oumayma03 commited on Jul 9, 2025

Commit

bdeb60c

verified ·

1 Parent(s): ca5f9c2

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -13

app.py CHANGED Viewed

@@ -5,12 +5,13 @@ import openai
 import pandas as pd
 import spaces
 from langchain.docstore.document import Document
-from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import os
 from huggingface_hub import upload_file
 from datasets import load_dataset, Dataset
 class Meme(BaseModel):
     link: str = Field(..., description="The URL of the meme")
@@ -22,8 +23,7 @@ CSV_PATH = "memes_descriptions.csv" # Updated CSV_PATH to reflect the data direc
 API_KEY=os.getenv("OPENROUTER_API_KEY")
 HF_key = os.getenv("HUGGINGFACE_KEY")
-retriever = None
 try:
     client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
     model = outlines.from_openai(client, MODEL_NAME)
@@ -49,7 +49,11 @@ def load_data_and_create_vectorstore():
         embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
         vectorstore = FAISS.from_documents(documents, embedding_model)
-        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
         print("✅ Data loaded and vectorstore created.")
     except FileNotFoundError:
         print(f"❌ Data file not found at {CSV_PATH}")
@@ -73,9 +77,9 @@ def ask_llm(question: str, docs: list) -> Meme:
     print(messages)
     try:
         meme = model(messages,Meme, stream=False)
-        #print(meme)
         meme = Meme.model_validate_json(meme)
-        #print(meme)
         return meme
     except Exception as e:
         return f"❌ LLM Error: {e}"
@@ -92,11 +96,7 @@ def query_memes(user_input: str):
     raw_url = meme.link
     description = meme.description
-    if "drive.google" in raw_url:
-        file_id = raw_url.split("/file/d/")[1].split("/")[0]
-        embed_url = f"https://drive.google.com/file/d/{file_id}/preview"
-    else:
-        embed_url = raw_url
     embed_html = f'''
     <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
@@ -113,7 +113,7 @@ DATASET_REPO = "oumayma03/moul_lmemes_dataset"
 def upload_video_to_dataset(local_path: str) -> str:
     filename = os.path.basename(local_path)
-    remote_path = f"{filename}"
     uploaded_url = upload_file(
         path_or_fileobj=local_path,
         path_in_repo=remote_path,
@@ -147,11 +147,28 @@ def upload_meme(video_file, label):
         append_to_dataset_repo(video_url, label) # Pass label to the function
         # Reload vectorstore to include new meme
-        #load_data_and_create_vectorstore()
         return "✅ Meme uploaded successfully with label!"
     except Exception as e:
         return f"❌ Upload failed: {str(e)}"
 # --- GRADIO INTERFACE ---
 if __name__ == "__main__":
     if retriever is None or client is None:

 import pandas as pd
 import spaces
 from langchain.docstore.document import Document
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 import gradio as gr
 import os
 from huggingface_hub import upload_file
 from datasets import load_dataset, Dataset
+import re
 class Meme(BaseModel):
     link: str = Field(..., description="The URL of the meme")
 API_KEY=os.getenv("OPENROUTER_API_KEY")
 HF_key = os.getenv("HUGGINGFACE_KEY")
+retriever = None
 try:
     client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
     model = outlines.from_openai(client, MODEL_NAME)
         embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
         vectorstore = FAISS.from_documents(documents, embedding_model)
+        try:
+            retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
+        except Exception as e:
+            print(f"❌ Error creating retriever: {e}")
+            retriever = None
         print("✅ Data loaded and vectorstore created.")
     except FileNotFoundError:
         print(f"❌ Data file not found at {CSV_PATH}")
     print(messages)
     try:
         meme = model(messages,Meme, stream=False)
+        print(meme)
         meme = Meme.model_validate_json(meme)
+        print(meme)
         return meme
     except Exception as e:
         return f"❌ LLM Error: {e}"
     raw_url = meme.link
     description = meme.description
+    embed_url = get_embed_url(raw_url)
     embed_html = f'''
     <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
 def upload_video_to_dataset(local_path: str) -> str:
     filename = os.path.basename(local_path)
+    remote_path = f"data/{filename}"
     uploaded_url = upload_file(
         path_or_fileobj=local_path,
         path_in_repo=remote_path,
         append_to_dataset_repo(video_url, label) # Pass label to the function
         # Reload vectorstore to include new meme
+        load_data_and_create_vectorstore()
         return "✅ Meme uploaded successfully with label!"
     except Exception as e:
         return f"❌ Upload failed: {str(e)}"
+def get_embed_url(raw_url: str) -> str:
+    # Google Drive
+    if "drive.google" in raw_url and "/file/d/" in raw_url:
+        file_id = raw_url.split("/file/d/")[1].split("/")[0]
+        return f"https://drive.google.com/file/d/{file_id}/preview"
+    # YouTube
+    youtube_match = re.match(r"(https?://)?(www\.)?(youtube\.com|youtu\.be)/(watch\?v=|embed/)?([A-Za-z0-9_\-]+)", raw_url)
+    if "youtube.com/watch?v=" in raw_url:
+        video_id = raw_url.split("watch?v=")[-1].split("&")[0]
+        return f"https://www.youtube.com/embed/{video_id}"
+    if "youtu.be/" in raw_url:
+        video_id = raw_url.split("youtu.be/")[-1].split("?")[0]
+        return f"https://www.youtube.com/embed/{video_id}"
+    # Default: return as is
+    return raw_url
 # --- GRADIO INTERFACE ---
 if __name__ == "__main__":
     if retriever is None or client is None: