Spaces:

fireworks-ai
/

catalog-extract

Running

App Files Files Community

RobertoBarrosoLuque commited on Oct 8

Commit

b32f568

1 Parent(s): b5d7c36

Update frontend with different prompts and cleanup

Browse files

Files changed (4) hide show

configs/prompt_library.yaml +37 -0
src/app.py +36 -48
src/modules/constants.py +7 -0
src/modules/vlm_inference.py +27 -5

configs/prompt_library.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+concise:
+  system: "You are an expert e-commerce fashion catalog assistant specializing in product classification and data management."
+  user: |
+    Analyze this fashion product image for internal catalog management.
+    Provide classification and a forconcise, factual description focusing on:
+    - Product type and key identifying features
+    - Essential attributes (color, style, material if visible)
+    Keep the description brief and functional (1-2 sentences maximum).
+descriptive:
+  system: "You are an expert e-commerce fashion copywriter who creates engaging, conversion-focused product descriptions."
+  user: |
+    Analyze this fashion product image for our customer-facing website.
+    Provide classification and a descriptive product description that:
+    - Highlights key features and visual appeal
+    - Uses vivid, engaging language that attracts shoppers
+    - Emphasizes style and benefits
+    - Stays concise (2-3 sentences maximum)
+    Write in an enthusiastic, customer-friendly tone.
+explanatory:
+  system: "You are an expert fashion consultant providing comprehensive product information to customer service representatives."
+  user: |
+    Analyze this fashion product image to help customer service agents assist shoppers.
+    Provide classification and a detailed, comprehensive description that includes:
+    - Complete product features and construction details
+    - Material composition and quality indicators (if visible)
+    - Styling suggestions and outfit pairing ideas
+    - Appropriate occasions and use cases
+    - Care considerations if applicable
+    Use 3-5 sentences. Be thorough and informative to help agents answer any customer questions.

src/app.py CHANGED Viewed

@@ -18,15 +18,24 @@ AVAILABLE_MODELS = {
     "Llama Scout": "accounts/fireworks/models/llama4-scout-instruct-basic",
 }
-EXAMPLE_IMAGES_DIR = Path("data/examples")
 MAX_CONCURRENT_REQUESTS = 10
 FILE_PATH = Path(__file__).parents[1]
 ASSETS_PATH = FILE_PATH / "assets"
 def analyze_single_image(
-    image_input, model_name: str, api_key: Optional[str] = None
 ) -> tuple[str, str, str, str]:
     """
     Process a single product image and return classification results
@@ -35,6 +44,7 @@ def analyze_single_image(
         image_input: PIL Image or file path
         model_name: Selected model name
         api_key: Optional API key override
     Returns:
         tuple: (master_category, gender, sub_category, description)
@@ -53,8 +63,17 @@ def analyze_single_image(
         if api_key is None:
             api_key = os.getenv("FIREWORKS_API_KEY")
         result = analyze_product_image(
-            image_url=img_b64, model=model_id, api_key=api_key, provider="Fireworks"
         )
         # Format results
@@ -75,7 +94,7 @@ def process_batch_dataset(
     model_name: str,
     api_key: Optional[str] = None,
     max_concurrent: int = MAX_CONCURRENT_REQUESTS,
-) -> tuple[pd.DataFrame, str]:
     """
     Process uploaded CSV dataset with product images
@@ -218,14 +237,19 @@ def create_demo_interface():
                 value=list(AVAILABLE_MODELS.keys())[0],
                 label="Select Model",
             )
             api_key_input = gr.Textbox(
                 label="API Key",
                 type="password",
             )
         with gr.Tabs():
-            with gr.TabItem("📸 Single Image Analysis"):
-                gr.Markdown("### Upload a product image for instant classification")
                 with gr.Row():
                     # Left column - Input
@@ -265,7 +289,12 @@ def create_demo_interface():
                 # Wire up single image analysis
                 analyze_btn.click(
                     fn=analyze_single_image,
-                    inputs=[image_input, model_selector, api_key_input],
                     outputs=[
                         master_category_output,
                         gender_output,
@@ -281,47 +310,6 @@ def create_demo_interface():
                     outputs=[image_input],
                 )
-                with gr.Row():
-                    # Left - Upload
-                    with gr.Column(scale=1):
-                        dataset_upload = gr.File(
-                            label="Upload Dataset (CSV)", file_types=[".csv"]
-                        )
-                        concurrent_slider = gr.Slider(
-                            minimum=1,
-                            maximum=50,
-                            value=10,
-                            step=1,
-                            label="Concurrent Requests",
-                            info="Higher = faster but may hit rate limits",
-                        )
-                        process_btn = gr.Button(
-                            "⚡ Process Dataset", variant="primary", size="lg"
-                        )
-                    # Right - Results summary
-                    with gr.Column(scale=1):
-                        summary_output = gr.Textbox(
-                            label="Processing Summary", interactive=False, lines=8
-                        )
-                # Results dataframe
-                results_dataframe = gr.Dataframe(
-                    label="Classification Results", interactive=False, wrap=True
-                )
-                # Wire up batch processing
-                process_btn.click(
-                    fn=process_batch_dataset,
-                    inputs=[
-                        dataset_upload,
-                        model_selector,
-                        api_key_input,
-                        concurrent_slider,
-                    ],
-                    outputs=[results_dataframe, summary_output],
-                )
             # Tab 3: Model Evaluation (show uploaded charts)
             with gr.TabItem("📈 Model Performance"):
                 gr.Markdown(

     "Llama Scout": "accounts/fireworks/models/llama4-scout-instruct-basic",
 }
 MAX_CONCURRENT_REQUESTS = 10
 FILE_PATH = Path(__file__).parents[1]
 ASSETS_PATH = FILE_PATH / "assets"
+# Prompt style display names
+PROMPT_STYLES = {
+    "Data Management": "concise",
+    "Website/Sales": "descriptive",
+    "Customer Support": "explanatory",
+}
 def analyze_single_image(
+    image_input,
+    model_name: str,
+    api_key: Optional[str] = None,
+    prompt_style_display: Optional[str] = None,
 ) -> tuple[str, str, str, str]:
     """
     Process a single product image and return classification results
         image_input: PIL Image or file path
         model_name: Selected model name
         api_key: Optional API key override
+        prompt_style_display: Display name for prompt style (e.g., "Data Management")
     Returns:
         tuple: (master_category, gender, sub_category, description)
         if api_key is None:
             api_key = os.getenv("FIREWORKS_API_KEY")
+        # Map display name to prompt key
+        prompt_style = (
+            PROMPT_STYLES.get(prompt_style_display) if prompt_style_display else None
+        )
         result = analyze_product_image(
+            image_url=img_b64,
+            model=model_id,
+            api_key=api_key,
+            provider="Fireworks",
+            prompt_style=prompt_style,
         )
         # Format results
     model_name: str,
     api_key: Optional[str] = None,
     max_concurrent: int = MAX_CONCURRENT_REQUESTS,
+) -> tuple[Optional[pd.DataFrame], str]:
     """
     Process uploaded CSV dataset with product images
                 value=list(AVAILABLE_MODELS.keys())[0],
                 label="Select Model",
             )
+            prompt_selector = gr.Dropdown(
+                choices=list(PROMPT_STYLES.keys()),
+                value="Website/Sales",
+                label="Description Style",
+            )
             api_key_input = gr.Textbox(
                 label="API Key",
                 type="password",
             )
         with gr.Tabs():
+            with gr.TabItem("📸 Image Analysis 📸 "):
+                gr.Markdown("### Upload a product image or select from table below")
                 with gr.Row():
                     # Left column - Input
                 # Wire up single image analysis
                 analyze_btn.click(
                     fn=analyze_single_image,
+                    inputs=[
+                        image_input,
+                        model_selector,
+                        api_key_input,
+                        prompt_selector,
+                    ],
                     outputs=[
                         master_category_output,
                         gender_output,
                     outputs=[image_input],
                 )
             # Tab 3: Model Evaluation (show uploaded charts)
             with gr.TabItem("📈 Model Performance"):
                 gr.Markdown(

src/modules/constants.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import yaml
+from pathlib import Path
+_PATH_TO_CONFIGS = Path(__file__).parents[2] / "configs" / "prompt_library.yaml"
+with open(_PATH_TO_CONFIGS, "r") as f:
+    PROMPT_LIBRARY = yaml.safe_load(f)

src/modules/vlm_inference.py CHANGED Viewed

@@ -2,9 +2,11 @@ import os
 from openai import OpenAI, AsyncOpenAI
 from pydantic import BaseModel, Field
 from typing import Optional, Literal
 SYSTEM_PROMPT = """
-    You are a fashion product analyst. Classify products and generate detailed descriptions based on images.
     """
 USER_PROMPT = """
     Analyze this fashion product image and provide:
@@ -73,6 +75,7 @@ def analyze_product_image(
     model: str = "accounts/fireworks/models/qwen2p5-vl-72b-instruct",
     api_key: Optional[str] = None,
     provider: str = "Fireworks",
 ) -> ProductClassification:
     """
     Analyze a fashion product image using VLM with structured output
@@ -82,6 +85,7 @@ def analyze_product_image(
         model: Model to use for inference (default: Qwen2.5 VL 72B)
         api_key: Fireworks API key (defaults to FIREWORKS_API_KEY env variable)
         provider: Provider to use for inference (default: Fireworks)
     Returns:
         ProductClassification: Structured classification and description
@@ -98,16 +102,24 @@ def analyze_product_image(
     else:
         raise ValueError(f"Unknown provider: {provider}")
     # Call the API with structured output
     completion = client.beta.chat.completions.parse(
         model=model,
         messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
             {
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": image_url}},
-                    {"type": "text", "text": USER_PROMPT},
                 ],
             },
         ],
@@ -123,6 +135,7 @@ async def analyze_product_image_async(
     model: str = "accounts/fireworks/models/qwen2p5-vl-72b-instruct",
     api_key: Optional[str] = None,
     provider: str = "Fireworks",
 ) -> ProductClassification:
     """
     Async version of analyze_product_image for concurrent processing
@@ -132,6 +145,7 @@ async def analyze_product_image_async(
         model: Model to use for inference (default: Qwen2.5 VL 72B)
         api_key: API key (defaults to provider-specific env variable)
         provider: Provider to use for inference (default: Fireworks)
     Returns:
         ProductClassification: Structured classification and description
@@ -148,16 +162,24 @@ async def analyze_product_image_async(
     else:
         raise ValueError(f"Unknown provider: {provider}")
     # Call the API with structured output
     completion = await client.beta.chat.completions.parse(
         model=model,
         messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
             {
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": image_url}},
-                    {"type": "text", "text": USER_PROMPT},
                 ],
             },
         ],

 from openai import OpenAI, AsyncOpenAI
 from pydantic import BaseModel, Field
 from typing import Optional, Literal
+from src.modules.constants import PROMPT_LIBRARY
 SYSTEM_PROMPT = """
+    You are an e-commerce fashion catalog assistant.
+    Classify products and generate detailed descriptions based on images.
     """
 USER_PROMPT = """
     Analyze this fashion product image and provide:
     model: str = "accounts/fireworks/models/qwen2p5-vl-72b-instruct",
     api_key: Optional[str] = None,
     provider: str = "Fireworks",
+    prompt_style: Optional[str] = None,
 ) -> ProductClassification:
     """
     Analyze a fashion product image using VLM with structured output
         model: Model to use for inference (default: Qwen2.5 VL 72B)
         api_key: Fireworks API key (defaults to FIREWORKS_API_KEY env variable)
         provider: Provider to use for inference (default: Fireworks)
+        prompt_style: Prompt style from library (concise, descriptive, explanatory). Defaults to fallback prompts.
     Returns:
         ProductClassification: Structured classification and description
     else:
         raise ValueError(f"Unknown provider: {provider}")
+    # Get prompts from library or use defaults
+    if prompt_style and prompt_style in PROMPT_LIBRARY:
+        system_prompt = PROMPT_LIBRARY[prompt_style]["system"]
+        user_prompt = PROMPT_LIBRARY[prompt_style]["user"]
+    else:
+        system_prompt = SYSTEM_PROMPT
+        user_prompt = USER_PROMPT
     # Call the API with structured output
     completion = client.beta.chat.completions.parse(
         model=model,
         messages=[
+            {"role": "system", "content": system_prompt},
             {
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": user_prompt},
                 ],
             },
         ],
     model: str = "accounts/fireworks/models/qwen2p5-vl-72b-instruct",
     api_key: Optional[str] = None,
     provider: str = "Fireworks",
+    prompt_style: Optional[str] = None,
 ) -> ProductClassification:
     """
     Async version of analyze_product_image for concurrent processing
         model: Model to use for inference (default: Qwen2.5 VL 72B)
         api_key: API key (defaults to provider-specific env variable)
         provider: Provider to use for inference (default: Fireworks)
+        prompt_style: Prompt style from library (concise, descriptive, explanatory). Defaults to fallback prompts.
     Returns:
         ProductClassification: Structured classification and description
     else:
         raise ValueError(f"Unknown provider: {provider}")
+    # Get prompts from library or use defaults
+    if prompt_style and prompt_style in PROMPT_LIBRARY:
+        system_prompt = PROMPT_LIBRARY[prompt_style]["system"]
+        user_prompt = PROMPT_LIBRARY[prompt_style]["user"]
+    else:
+        system_prompt = SYSTEM_PROMPT
+        user_prompt = USER_PROMPT
     # Call the API with structured output
     completion = await client.beta.chat.completions.parse(
         model=model,
         messages=[
+            {"role": "system", "content": system_prompt},
             {
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": user_prompt},
                 ],
             },
         ],