catalog-extract / generate_eval_results.py
RobertoBarrosoLuque
Add interactive plots
ff3bc98
raw
history blame
1.79 kB
"""
Script to generate evaluation results CSV for all model predictions
"""
import pandas as pd
from pathlib import Path
from src.modules.evals import evaluate_all_categories, extract_metrics
DATA_PATH = Path(__file__).parent / "data"
# Load test.csv (ground truth)
test_df = pd.read_csv(DATA_PATH / "test.csv")
# Define model prediction files and their display names
model_files = {
"Qwen2.5-VL-32B-BASE": "df_pred_FireworksAI_qwen2p5-vl-32b-instruct-ralh0ben.csv",
"Qwen2-VL-72B-BASE": "df_pred_FireworksAI_qwen2-vl-72b-BASE-instruct-yaxztv7t.csv",
"Qwen2-VL-72B-SFT": "df_pred_FireworksAI_qwen-72b-SFT-fashion-catalog-oueqouqs.csv",
"GPT-5-Mini": "df_pred_OpenAI_gpt-5-mini-2025-08-07.csv",
}
# Collect all metrics
all_metrics = []
for model_name, filename in model_files.items():
pred_file = DATA_PATH / filename
if not pred_file.exists():
print(f"Warning: {filename} not found, skipping...")
continue
print(f"\nEvaluating {model_name}...")
print("=" * 60)
# Load predictions
pred_df = pd.read_csv(pred_file)
# Evaluate all categories
results = evaluate_all_categories(
df_ground_truth=test_df,
df_predictions=pred_df,
id_col="id",
categories=["masterCategory", "gender", "subCategory"],
)
# Extract metrics for this model
model_metrics = extract_metrics(results, model_name)
all_metrics.extend(model_metrics)
# Create DataFrame with all metrics
metrics_df = pd.DataFrame(all_metrics)
# Save to CSV
output_file = DATA_PATH / "evaluation_results.csv"
metrics_df.to_csv(output_file, index=False)
print(f"\n{'=' * 60}")
print(f"Evaluation complete! Results saved to: {output_file}")
print(f"{'=' * 60}")
print("\nSummary:")
print(metrics_df.to_string(index=False))