""" Script to generate evaluation results CSV for all model predictions """ import pandas as pd from pathlib import Path from src.modules.evals import evaluate_all_categories, extract_metrics DATA_PATH = Path(__file__).parent / "data" # Load test.csv (ground truth) test_df = pd.read_csv(DATA_PATH / "test.csv") # Define model prediction files and their display names model_files = { "Qwen2.5-VL-32B-BASE": "df_pred_FireworksAI_qwen2p5-vl-32b-instruct-ralh0ben.csv", "Qwen2.5-VL-32B-SFT": "df_pred_FireworksAI_qwen-32b-SFT-fashion-catalog-c6fhxibo.csv", "Qwen2-VL-72B-BASE": "df_pred_FireworksAI_qwen2-vl-72b-BASE-instruct-yaxztv7t.csv", "Qwen2-VL-72B-SFT": "df_pred_FireworksAI_qwen-72b-SFT-fashion-catalog-oueqouqs.csv", "GPT-5-Mini": "df_pred_OpenAI_gpt-5-mini-2025-08-07.csv", } # Collect all metrics all_metrics = [] for model_name, filename in model_files.items(): pred_file = DATA_PATH / filename if not pred_file.exists(): print(f"Warning: {filename} not found, skipping...") continue print(f"\nEvaluating {model_name}...") print("=" * 60) # Load predictions pred_df = pd.read_csv(pred_file) # Evaluate all categories results = evaluate_all_categories( df_ground_truth=test_df, df_predictions=pred_df, id_col="id", categories=["masterCategory", "gender", "subCategory"], ) # Extract metrics for this model model_metrics = extract_metrics(results, model_name) all_metrics.extend(model_metrics) # Create DataFrame with all metrics metrics_df = pd.DataFrame(all_metrics) # Save to CSV output_file = DATA_PATH / "evaluation_results.csv" metrics_df.to_csv(output_file, index=False) print(f"\n{'=' * 60}") print(f"Evaluation complete! Results saved to: {output_file}") print(f"{'=' * 60}") print("\nSummary:") print(metrics_df.to_string(index=False))