import json import tempfile from typing import List, Dict, Any import gradio as gr from transformers import pipeline # --- Lazy init: Space başlarken modeli bir kez yükleyelim quality_clf = pipeline("text-classification", model="snorkelai/instruction-response-quality") def score_item(item: Dict[str, Any]) -> Dict[str, Any]: """Tek bir QA kaydını skorla ve quality alanını ekle.""" q = item.get("question", "") a = item.get("answer", "") text = f"Q: {q}\nA: {a}" pred = quality_clf(text, truncation=True)[0] score = float(pred["score"]) if score > 0.75: label = "high" elif score > 0.40: label = "medium" else: label = "low" item["quality"] = {"label": label, "score": round(score, 3)} return item def improve_item(item: Dict[str, Any], target: str = "medium") -> Dict[str, Any]: """ Düşük skorlara basit 'kural tabanlı' iyileştirme (LLM yok; hafif ve ücretsiz). İstersen buraya bir instruct LLM entegre edebiliriz. """ label = item.get("quality", {}).get("label") if label in ("high",) or target == "none": return item q = item.get("question", "") a = item.get("answer", "") # Basit temizlikler: boşluk, büyük harf, noktalama q2 = q.strip() if q2 and q2[-1] not in "?": q2 += "?" if q2 and q2[0].islower(): q2 = q2[0].upper() + q2[1:] a2 = a.strip() if a2 and a2[0].islower(): a2 = a2[0].upper() + a2[1:] if a2 and a2[-1] not in ".!?": a2 += "." # Çok kısa cevapları minimal genişletme if len(a2.split()) < 5: a2 = a2 + " This answer has been clarified for brevity and precision." item["question"] = q2 item["answer"] = a2 return item def process_json( file, auto_improve: bool, improve_threshold: str ): # JSON içeriğini yükle (liste veya tek obje destekler) data = json.load(open(file.name)) items: List[Dict[str, Any]] = data if isinstance(data, list) else [data] # Skorla scored: List[Dict[str, Any]] = [score_item(dict(it)) for it in items] # İyileştirme isteğe bağlı if auto_improve: def needs_improve(lbl: str) -> bool: if improve_threshold == "low_only": return lbl == "low" elif improve_threshold == "low_and_medium": return lbl in ("low", "medium") return False improved = [] for it in scored: lbl = it.get("quality", {}).get("label", "low") if needs_improve(lbl): it = improve_item(it) # yeniden skorlayalım ki farkı görelim it = score_item(it) improved.append(it) scored = improved # Özet tablo için küçük bir görünüm (id yoksa index) summary = [] for idx, it in enumerate(scored): summary.append({ "id": it.get("id", idx), "quality_label": it["quality"]["label"], "quality_score": it["quality"]["score"], "question_preview": (it.get("question") or "")[:120] }) # İndirilebilir JSON oluştur tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w") json.dump(scored, tmp, indent=2, ensure_ascii=False) tmp.flush() tmp.close() # Büyük JSON’u rahat okumak için Code kutusu pretty = json.dumps(scored[:50], indent=2, ensure_ascii=False) # önizlemede ilk 50 satır if len(scored) > 50: pretty += f"\n\n// NOTE: Showing first 50 items. Download full file below." return summary, pretty, tmp.name with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo: gr.Markdown("## Q&A Quality Evaluator\nUpload your JSON, score quality, and (optionally) auto-improve low items.") with gr.Row(): inp_file = gr.File(file_types=[".json"], label="Upload JSON (list of objects)") with gr.Row(): auto_switch = gr.Checkbox(label="Auto-improve low-quality items (light rules, no LLM)", value=False) improve_sel = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only", label="Improve threshold") run_btn = gr.Button("Score (and Improve)") gr.Markdown("### Results") with gr.Tab("Summary Table"): out_table = gr.Dataframe(headers=["id", "quality_label", "quality_score", "question_preview"], wrap=True, height=400) with gr.Tab("Preview JSON"): # Büyük bir pencere: lines=30 ile rahat görüntü out_code = gr.Code(language="json", label="Preview (first 50 items)", interactive=False, lines=30) with gr.Tab("Download"): out_file = gr.File(label="Download full scored JSON") run_btn.click( process_json, inputs=[inp_file, auto_switch, improve_sel], outputs=[out_table, out_code, out_file] ) if __name__ == "__main__": demo.launch()