Spaces:

korayaggul
/

QA-Quality-Evaluator

Running

App Files Files Community

korayaggul commited on Sep 24

Commit

f509ff5

verified ·

1 Parent(s): 7983f5c

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -38

app.py CHANGED Viewed

@@ -1,46 +1,143 @@
-import gradio as gr
 import json
 from transformers import pipeline
-# Modeli yükle
-quality_model = pipeline("text-classification", model="snorkelai/instruction-response-quality")
-def analyze_json(file):
     data = json.load(open(file.name))
-    results = []
-    for i, item in enumerate(data if isinstance(data, list) else [data]):
-        question = item.get("question", "")
-        answer = item.get("answer", "")
-        text = f"Q: {question}\nA: {answer}"
-        # Modelden kalite skoru al
-        pred = quality_model(text, truncation=True)[0]
-        score = round(pred["score"], 3)
-        # Kalite seviyesini skora göre belirle
-        if score > 0.75:
-            quality = "high"
-        elif score > 0.4:
-            quality = "medium"
-        else:
-            quality = "low"
-        # Orijinal JSON'a quality ekle
-        item["quality"] = {
-            "label": quality,
-            "score": score
-        }
-        results.append(item)
-    return json.dumps(results, indent=2)
-demo = gr.Interface(
-    fn=analyze_json,
-    inputs=gr.File(file_types=[".json"], label="Upload JSON file"),
-    outputs="text",
-    title="Q&A Quality Evaluator"
-)
 if __name__ == "__main__":
     demo.launch()

 import json
+import tempfile
+from typing import List, Dict, Any
+import gradio as gr
 from transformers import pipeline
+# --- Lazy init: Space başlarken modeli bir kez yükleyelim
+quality_clf = pipeline("text-classification", model="snorkelai/instruction-response-quality")
+def score_item(item: Dict[str, Any]) -> Dict[str, Any]:
+    """Tek bir QA kaydını skorla ve quality alanını ekle."""
+    q = item.get("question", "")
+    a = item.get("answer", "")
+    text = f"Q: {q}\nA: {a}"
+    pred = quality_clf(text, truncation=True)[0]
+    score = float(pred["score"])
+    if score > 0.75:
+        label = "high"
+    elif score > 0.40:
+        label = "medium"
+    else:
+        label = "low"
+    item["quality"] = {"label": label, "score": round(score, 3)}
+    return item
+def improve_item(item: Dict[str, Any], target: str = "medium") -> Dict[str, Any]:
+    """
+    Düşük skorlara basit 'kural tabanlı' iyileştirme (LLM yok; hafif ve ücretsiz).
+    İstersen buraya bir instruct LLM entegre edebiliriz.
+    """
+    label = item.get("quality", {}).get("label")
+    if label in ("high",) or target == "none":
+        return item
+    q = item.get("question", "")
+    a = item.get("answer", "")
+    # Basit temizlikler: boşluk, büyük harf, noktalama
+    q2 = q.strip()
+    if q2 and q2[-1] not in "?":
+        q2 += "?"
+    if q2 and q2[0].islower():
+        q2 = q2[0].upper() + q2[1:]
+    a2 = a.strip()
+    if a2 and a2[0].islower():
+        a2 = a2[0].upper() + a2[1:]
+    if a2 and a2[-1] not in ".!?":
+        a2 += "."
+    # Çok kısa cevapları minimal genişletme
+    if len(a2.split()) < 5:
+        a2 = a2 + " This answer has been clarified for brevity and precision."
+    item["question"] = q2
+    item["answer"] = a2
+    return item
+def process_json(
+    file,
+    auto_improve: bool,
+    improve_threshold: str
+):
+    # JSON içeriğini yükle (liste veya tek obje destekler)
     data = json.load(open(file.name))
+    items: List[Dict[str, Any]] = data if isinstance(data, list) else [data]
+    # Skorla
+    scored: List[Dict[str, Any]] = [score_item(dict(it)) for it in items]
+    # İyileştirme isteğe bağlı
+    if auto_improve:
+        def needs_improve(lbl: str) -> bool:
+            if improve_threshold == "low_only":
+                return lbl == "low"
+            elif improve_threshold == "low_and_medium":
+                return lbl in ("low", "medium")
+            return False
+        improved = []
+        for it in scored:
+            lbl = it.get("quality", {}).get("label", "low")
+            if needs_improve(lbl):
+                it = improve_item(it)
+                # yeniden skorlayalım ki farkı görelim
+                it = score_item(it)
+            improved.append(it)
+        scored = improved
+    # Özet tablo için küçük bir görünüm (id yoksa index)
+    summary = []
+    for idx, it in enumerate(scored):
+        summary.append({
+            "id": it.get("id", idx),
+            "quality_label": it["quality"]["label"],
+            "quality_score": it["quality"]["score"],
+            "question_preview": (it.get("question") or "")[:120]
+        })
+    # İndirilebilir JSON oluştur
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w")
+    json.dump(scored, tmp, indent=2, ensure_ascii=False)
+    tmp.flush()
+    tmp.close()
+    # Büyük JSON’u rahat okumak için Code kutusu
+    pretty = json.dumps(scored[:50], indent=2, ensure_ascii=False)  # önizlemede ilk 50 satır
+    if len(scored) > 50:
+        pretty += f"\n\n// NOTE: Showing first 50 items. Download full file below."
+    return summary, pretty, tmp.name
+with gr.Blocks(title="Q&A Quality Evaluator", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## Q&A Quality Evaluator\nUpload your JSON, score quality, and (optionally) auto-improve low items.")
+    with gr.Row():
+        inp_file = gr.File(file_types=[".json"], label="Upload JSON (list of objects)")
+    with gr.Row():
+        auto_switch = gr.Checkbox(label="Auto-improve low-quality items (light rules, no LLM)", value=False)
+        improve_sel = gr.Radio(choices=["low_only", "low_and_medium", "none"], value="low_only",
+                               label="Improve threshold")
+    run_btn = gr.Button("Score (and Improve)")
+    gr.Markdown("### Results")
+    with gr.Tab("Summary Table"):
+        out_table = gr.Dataframe(headers=["id", "quality_label", "quality_score", "question_preview"], wrap=True, height=400)
+    with gr.Tab("Preview JSON"):
+        # Büyük bir pencere: lines=30 ile rahat görüntü
+        out_code = gr.Code(language="json", label="Preview (first 50 items)", interactive=False, lines=30)
+    with gr.Tab("Download"):
+        out_file = gr.File(label="Download full scored JSON")
+    run_btn.click(
+        process_json,
+        inputs=[inp_file, auto_switch, improve_sel],
+        outputs=[out_table, out_code, out_file]
+    )
 if __name__ == "__main__":
     demo.launch()