Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,12 +5,13 @@ import openai
|
|
| 5 |
import pandas as pd
|
| 6 |
import spaces
|
| 7 |
from langchain.docstore.document import Document
|
| 8 |
-
from
|
| 9 |
from langchain_community.vectorstores import FAISS
|
| 10 |
import gradio as gr
|
| 11 |
import os
|
| 12 |
from huggingface_hub import upload_file
|
| 13 |
from datasets import load_dataset, Dataset
|
|
|
|
| 14 |
|
| 15 |
class Meme(BaseModel):
|
| 16 |
link: str = Field(..., description="The URL of the meme")
|
|
@@ -22,8 +23,7 @@ CSV_PATH = "memes_descriptions.csv" # Updated CSV_PATH to reflect the data direc
|
|
| 22 |
|
| 23 |
API_KEY=os.getenv("OPENROUTER_API_KEY")
|
| 24 |
HF_key = os.getenv("HUGGINGFACE_KEY")
|
| 25 |
-
|
| 26 |
-
retriever = None
|
| 27 |
try:
|
| 28 |
client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
|
| 29 |
model = outlines.from_openai(client, MODEL_NAME)
|
|
@@ -49,7 +49,11 @@ def load_data_and_create_vectorstore():
|
|
| 49 |
|
| 50 |
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
|
| 51 |
vectorstore = FAISS.from_documents(documents, embedding_model)
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
print("β
Data loaded and vectorstore created.")
|
| 54 |
except FileNotFoundError:
|
| 55 |
print(f"β Data file not found at {CSV_PATH}")
|
|
@@ -73,9 +77,9 @@ def ask_llm(question: str, docs: list) -> Meme:
|
|
| 73 |
print(messages)
|
| 74 |
try:
|
| 75 |
meme = model(messages,Meme, stream=False)
|
| 76 |
-
|
| 77 |
meme = Meme.model_validate_json(meme)
|
| 78 |
-
|
| 79 |
return meme
|
| 80 |
except Exception as e:
|
| 81 |
return f"β LLM Error: {e}"
|
|
@@ -92,11 +96,7 @@ def query_memes(user_input: str):
|
|
| 92 |
raw_url = meme.link
|
| 93 |
description = meme.description
|
| 94 |
|
| 95 |
-
|
| 96 |
-
file_id = raw_url.split("/file/d/")[1].split("/")[0]
|
| 97 |
-
embed_url = f"https://drive.google.com/file/d/{file_id}/preview"
|
| 98 |
-
else:
|
| 99 |
-
embed_url = raw_url
|
| 100 |
|
| 101 |
embed_html = f'''
|
| 102 |
<div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
|
|
@@ -113,7 +113,7 @@ DATASET_REPO = "oumayma03/moul_lmemes_dataset"
|
|
| 113 |
|
| 114 |
def upload_video_to_dataset(local_path: str) -> str:
|
| 115 |
filename = os.path.basename(local_path)
|
| 116 |
-
remote_path = f"{filename}"
|
| 117 |
uploaded_url = upload_file(
|
| 118 |
path_or_fileobj=local_path,
|
| 119 |
path_in_repo=remote_path,
|
|
@@ -147,11 +147,28 @@ def upload_meme(video_file, label):
|
|
| 147 |
append_to_dataset_repo(video_url, label) # Pass label to the function
|
| 148 |
|
| 149 |
# Reload vectorstore to include new meme
|
| 150 |
-
|
| 151 |
|
| 152 |
return "β
Meme uploaded successfully with label!"
|
| 153 |
except Exception as e:
|
| 154 |
return f"β Upload failed: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
# --- GRADIO INTERFACE ---
|
| 156 |
if __name__ == "__main__":
|
| 157 |
if retriever is None or client is None:
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
import spaces
|
| 7 |
from langchain.docstore.document import Document
|
| 8 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 9 |
from langchain_community.vectorstores import FAISS
|
| 10 |
import gradio as gr
|
| 11 |
import os
|
| 12 |
from huggingface_hub import upload_file
|
| 13 |
from datasets import load_dataset, Dataset
|
| 14 |
+
import re
|
| 15 |
|
| 16 |
class Meme(BaseModel):
|
| 17 |
link: str = Field(..., description="The URL of the meme")
|
|
|
|
| 23 |
|
| 24 |
API_KEY=os.getenv("OPENROUTER_API_KEY")
|
| 25 |
HF_key = os.getenv("HUGGINGFACE_KEY")
|
| 26 |
+
retriever = None
|
|
|
|
| 27 |
try:
|
| 28 |
client = openai.OpenAI(base_url="https://openrouter.ai/api/v1", api_key=API_KEY)
|
| 29 |
model = outlines.from_openai(client, MODEL_NAME)
|
|
|
|
| 49 |
|
| 50 |
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large-instruct")
|
| 51 |
vectorstore = FAISS.from_documents(documents, embedding_model)
|
| 52 |
+
try:
|
| 53 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"β Error creating retriever: {e}")
|
| 56 |
+
retriever = None
|
| 57 |
print("β
Data loaded and vectorstore created.")
|
| 58 |
except FileNotFoundError:
|
| 59 |
print(f"β Data file not found at {CSV_PATH}")
|
|
|
|
| 77 |
print(messages)
|
| 78 |
try:
|
| 79 |
meme = model(messages,Meme, stream=False)
|
| 80 |
+
print(meme)
|
| 81 |
meme = Meme.model_validate_json(meme)
|
| 82 |
+
print(meme)
|
| 83 |
return meme
|
| 84 |
except Exception as e:
|
| 85 |
return f"β LLM Error: {e}"
|
|
|
|
| 96 |
raw_url = meme.link
|
| 97 |
description = meme.description
|
| 98 |
|
| 99 |
+
embed_url = get_embed_url(raw_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
embed_html = f'''
|
| 102 |
<div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; border-radius: 8px; box-shadow: 0 4px 10px rgba(0,0,0,0.2); margin-top: 1em;">
|
|
|
|
| 113 |
|
| 114 |
def upload_video_to_dataset(local_path: str) -> str:
|
| 115 |
filename = os.path.basename(local_path)
|
| 116 |
+
remote_path = f"data/{filename}"
|
| 117 |
uploaded_url = upload_file(
|
| 118 |
path_or_fileobj=local_path,
|
| 119 |
path_in_repo=remote_path,
|
|
|
|
| 147 |
append_to_dataset_repo(video_url, label) # Pass label to the function
|
| 148 |
|
| 149 |
# Reload vectorstore to include new meme
|
| 150 |
+
load_data_and_create_vectorstore()
|
| 151 |
|
| 152 |
return "β
Meme uploaded successfully with label!"
|
| 153 |
except Exception as e:
|
| 154 |
return f"β Upload failed: {str(e)}"
|
| 155 |
+
|
| 156 |
+
def get_embed_url(raw_url: str) -> str:
|
| 157 |
+
# Google Drive
|
| 158 |
+
if "drive.google" in raw_url and "/file/d/" in raw_url:
|
| 159 |
+
file_id = raw_url.split("/file/d/")[1].split("/")[0]
|
| 160 |
+
return f"https://drive.google.com/file/d/{file_id}/preview"
|
| 161 |
+
# YouTube
|
| 162 |
+
youtube_match = re.match(r"(https?://)?(www\.)?(youtube\.com|youtu\.be)/(watch\?v=|embed/)?([A-Za-z0-9_\-]+)", raw_url)
|
| 163 |
+
if "youtube.com/watch?v=" in raw_url:
|
| 164 |
+
video_id = raw_url.split("watch?v=")[-1].split("&")[0]
|
| 165 |
+
return f"https://www.youtube.com/embed/{video_id}"
|
| 166 |
+
if "youtu.be/" in raw_url:
|
| 167 |
+
video_id = raw_url.split("youtu.be/")[-1].split("?")[0]
|
| 168 |
+
return f"https://www.youtube.com/embed/{video_id}"
|
| 169 |
+
# Default: return as is
|
| 170 |
+
return raw_url
|
| 171 |
+
|
| 172 |
# --- GRADIO INTERFACE ---
|
| 173 |
if __name__ == "__main__":
|
| 174 |
if retriever is None or client is None:
|